├── docs ├── gAnswer_help.pdf ├── TKDE18_gAnswer.pdf ├── gAnswer_help_en.pdf └── TKDE18_gAnswer_supplementary.pdf ├── src ├── qa │ ├── Matches.java │ ├── mapping │ │ ├── EntityFragmentDict.java │ │ └── DBpediaLookup.java │ ├── Answer.java │ ├── Query.java │ ├── Globals.java │ ├── extract │ │ └── CorefResolution.java │ ├── parsing │ │ └── QuestionParsing.java │ └── GAnswer.java ├── fgmt │ ├── Fragment.java │ ├── VariableFragment.java │ ├── RelationFragment.java │ ├── TypeFragment.java │ └── EntityFragment.java ├── rdf │ ├── NodeSelectedWithScore.java │ ├── PredicateMapping.java │ ├── MergedWord.java │ ├── EntityMapping.java │ ├── TypeMapping.java │ ├── SemanticUnit.java │ ├── ImplicitRelation.java │ ├── SimpleRelation.java │ ├── SemanticQueryGraph.java │ ├── SemanticRelation.java │ ├── Sparql.java │ └── Triple.java ├── lcn │ ├── EntityNameAndScore.java │ ├── Main.java │ ├── EntityFragmentFields.java │ ├── SearchInEntityFragments.java │ ├── BuildIndexForTypeShortName.java │ ├── BuildIndexForEntityFragments.java │ └── SearchInTypeShortName.java ├── paradict │ └── PredicateIDAndSupport.java ├── nlp │ ├── tool │ │ ├── Main.java │ │ ├── NERecognizer.java │ │ ├── StanfordParser.java │ │ ├── MaltParser.java │ │ ├── MaltParserCon.java │ │ └── CoreNLP.java │ └── ds │ │ ├── Sentence.java │ │ ├── Word.java │ │ └── DependencyTreeNode.java ├── application │ ├── GinfoHandler.java │ ├── GanswerHttp.java │ ├── GanswerHttpConnector.java │ └── GanswerHandler.java ├── utils │ └── FileUtil.java ├── log │ └── QueryLogger.java └── addition │ ├── AggregationRecognition.java │ └── AddtionalFix.java ├── .gitignore ├── genrate_fragments ├── extra_get_basic_and_yago.py ├── step2_dedubplicate.py ├── step7_get_predicate_fragment.py ├── step6_get_type_fragment.py ├── step3_split.py ├── step4_triple_to_number.py ├── step1_clean_triple.py ├── step5_get_entity_fragment.py └── How_to_generate_fragments.md ├── LICENSE ├── README_CH.md └── README.md /docs/gAnswer_help.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pkumod/gAnswer/HEAD/docs/gAnswer_help.pdf -------------------------------------------------------------------------------- /docs/TKDE18_gAnswer.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pkumod/gAnswer/HEAD/docs/TKDE18_gAnswer.pdf -------------------------------------------------------------------------------- /docs/gAnswer_help_en.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pkumod/gAnswer/HEAD/docs/gAnswer_help_en.pdf -------------------------------------------------------------------------------- /docs/TKDE18_gAnswer_supplementary.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pkumod/gAnswer/HEAD/docs/TKDE18_gAnswer_supplementary.pdf -------------------------------------------------------------------------------- /src/qa/Matches.java: -------------------------------------------------------------------------------- 1 | package qa; 2 | 3 | public class Matches { 4 | public String[][] answers = null; 5 | public int answersNum = 0; 6 | public long time = 0; 7 | 8 | public static final int pageNum = 3000; 9 | } 10 | -------------------------------------------------------------------------------- /src/fgmt/Fragment.java: -------------------------------------------------------------------------------- 1 | package fgmt; 2 | 3 | public abstract class Fragment { 4 | public enum typeEnum {ENTITY_FRAGMENT, RELATION_FRAGMENT, TYPE_FRAGMENT, VAR_FRAGMENT}; 5 | 6 | public typeEnum fragmentType; 7 | public int fragmentId; 8 | }; 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled class file 2 | *.class 3 | 4 | # Log file 5 | *.log 6 | 7 | # BlueJ files 8 | *.ctxt 9 | 10 | # Mobile Tools for Java (J2ME) 11 | .mtj.tmp/ 12 | 13 | # Package Files # 14 | *.jar 15 | *.war 16 | *.nar 17 | *.ear 18 | *.zip 19 | *.tar.gz 20 | *.rar 21 | 22 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 23 | hs_err_pid* 24 | -------------------------------------------------------------------------------- /src/rdf/NodeSelectedWithScore.java: -------------------------------------------------------------------------------- 1 | package rdf; 2 | 3 | import java.util.ArrayList; 4 | 5 | public class NodeSelectedWithScore implements Comparable 6 | { 7 | public ArrayList selected; 8 | int size; //split key to st and ed 9 | public double score = 0; 10 | 11 | public NodeSelectedWithScore(ArrayList a, double b) 12 | { 13 | selected = a; 14 | score = b; 15 | } 16 | 17 | // In descending order: big --> small 18 | public int compareTo(NodeSelectedWithScore o) { 19 | double diff = this.score - o.score; 20 | if (diff > 0) return -1; 21 | else if (diff < 0) return 1; 22 | else return 0; 23 | } 24 | } -------------------------------------------------------------------------------- /src/lcn/EntityNameAndScore.java: -------------------------------------------------------------------------------- 1 | package lcn; 2 | 3 | public class EntityNameAndScore implements Comparable { 4 | public int entityID; 5 | public String entityName; 6 | public double score; 7 | 8 | public EntityNameAndScore(int id, String n, double s) { 9 | entityID = id; 10 | entityName = n; 11 | score = s; 12 | } 13 | 14 | @Override 15 | public String toString() { 16 | return entityID + ":<" + entityName + ">\t" + score; 17 | } 18 | 19 | public int compareTo(EntityNameAndScore o) { 20 | if(this.score < o.score) { 21 | return 1; 22 | } 23 | else if (this.score > o.score) { 24 | return -1; 25 | } 26 | else { 27 | return 0; 28 | } 29 | } 30 | 31 | } 32 | -------------------------------------------------------------------------------- /src/rdf/PredicateMapping.java: -------------------------------------------------------------------------------- 1 | package rdf; 2 | 3 | public class PredicateMapping implements Comparable { 4 | public int pid = -1; 5 | public double score = 0; 6 | public String parapharase = null; 7 | 8 | public PredicateMapping (int pid, double sco, String para) { 9 | this.pid = pid; 10 | score = sco; 11 | parapharase = para; 12 | } 13 | 14 | // In descending order: big --> small 15 | public int compareTo(PredicateMapping o) { 16 | double diff = this.score - o.score; 17 | if (diff > 0) return -1; 18 | else if (diff < 0) return 1; 19 | else return 0; 20 | } 21 | 22 | @Override 23 | public String toString() { 24 | String ret = ""; 25 | ret = "<"+pid+" : "+parapharase+" : "+score+">"; 26 | return ret; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/paradict/PredicateIDAndSupport.java: -------------------------------------------------------------------------------- 1 | package paradict; 2 | 3 | public class PredicateIDAndSupport implements Comparable { 4 | public int predicateID; 5 | public int support; 6 | public double[] wordSelectivity = null; // wordSelectivity helps PATTY patterns ranking more accurate. 7 | 8 | public PredicateIDAndSupport(int _pid, int _support, double[] _slct) { 9 | predicateID = _pid; 10 | support = _support; 11 | wordSelectivity = _slct; 12 | } 13 | 14 | public int compareTo(PredicateIDAndSupport o) { 15 | return o.support - this.support; 16 | } 17 | 18 | // only use for predicate itself and handwriting paraphrase 19 | public static double[] genSlct(int size) { 20 | double[] ret = new double[size]; 21 | for (int i=0;i 9 | { 10 | //original position 11 | public int st,ed; 12 | //position after merge (unselected is -1) 13 | public int mergedPos = -1; 14 | public String name; 15 | public boolean mayCategory = false; 16 | public boolean mayLiteral = false; 17 | public boolean mayEnt = false; 18 | public boolean mayType = false; 19 | public ArrayList emList = null; 20 | public ArrayList tmList = null; 21 | public String category = null; 22 | 23 | public MergedWord(int s,int e,String n) 24 | { 25 | st = s; 26 | ed = e; 27 | name = n; 28 | } 29 | 30 | @Override 31 | //long to short 32 | public int compareTo(MergedWord o) 33 | { 34 | int lenDiff = (this.ed-this.st) - (o.ed-o.st); 35 | 36 | if (lenDiff > 0) return -1; 37 | else if (lenDiff < 0) return 1; 38 | return 0; 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /src/rdf/EntityMapping.java: -------------------------------------------------------------------------------- 1 | package rdf; 2 | 3 | import fgmt.EntityFragment; 4 | 5 | public class EntityMapping implements Comparable { 6 | public int entityID = -1; 7 | public String entityName = null; 8 | public double score = 0; 9 | 10 | public EntityFragment entityFragment = null; 11 | 12 | public EntityMapping(int eid, String en, double sco) { 13 | entityID = eid; 14 | entityName = en; 15 | score = sco; 16 | 17 | //punishment if entity start with "?" 18 | if (entityName.startsWith("?")) 19 | score *=0.5; 20 | } 21 | 22 | // In descending order: big --> small 23 | public int compareTo(EntityMapping o) { 24 | double diff = this.score - o.score; 25 | if (diff > 0) return -1; 26 | else if (diff < 0) return 1; 27 | else return 0; 28 | } 29 | 30 | public int hashCode() 31 | { 32 | return new Integer(entityID).hashCode(); 33 | } 34 | 35 | public String toString() 36 | { 37 | StringBuilder res = new StringBuilder(entityName+"("+score+")"); 38 | return res.toString(); 39 | } 40 | } -------------------------------------------------------------------------------- /genrate_fragments/step7_get_predicate_fragment.py: -------------------------------------------------------------------------------- 1 | #encoding=utf-8 2 | en2t = {} 3 | with open('input entity fragment','r') as f: 4 | for line in f: 5 | dou = line[:-1].split('\t') 6 | types = dou[1].replace('|','#').split('#')[4] 7 | typeset = types.split(',') 8 | en2t[dou[0]] = set() 9 | for t in typeset: 10 | if len(t)<6 and t!='-1' and len(t)>0: 11 | en2t[dou[0]].add(t) 12 | sen = set() 13 | lisen = {} 14 | for i in range(408261):#iterate every predicate 15 | lisen['%d'%i] = set() 16 | 17 | with open('triple file represented by ids here','r') as f: 18 | i = 1 19 | for line in f: 20 | if i%100000==0: 21 | print(i) 22 | tri = line[:-1].split('\t') 23 | if tri[0]!='-1': 24 | pre = '['+','.join(en2t[tri[0]])+']' 25 | else: 26 | pre = '[]' 27 | if tri[2]!='-1': 28 | pos = '['+','.join(en2t[tri[2]])+']\n' 29 | str = pre + '\t' + tri[1] + '\t' + pos 30 | sen.add(str) 31 | else: 32 | lisen[tri[1]].add(tri[0]) 33 | 34 | for k in lisen.keys(): 35 | str = '['+','.join(lisen[k])+']\t'+k+'\tliteral\n' 36 | sen.add(str) 37 | 38 | with open('output predicate fragment file','w') as f: 39 | for item in sen: 40 | f.write(item) 41 | print(len(sen)) 42 | 43 | 44 | -------------------------------------------------------------------------------- /genrate_fragments/step6_get_type_fragment.py: -------------------------------------------------------------------------------- 1 | #encoding=utf-8 2 | en2t = {} 3 | with open('input entity fragment file here','r') as f: 4 | for line in f: 5 | dou = line[:-1].split('\t') 6 | types = dou[1].replace('|','#').split('#')[4] 7 | typeset = types.split(',') 8 | en2t[dou[0]] = set() 9 | for t in typeset: 10 | if len(t)<6 and t!='-1' and len(t)>0: 11 | en2t[dou[0]].add(t) 12 | print("en2t loaded\n") 13 | lisen = {} 14 | for i in range(26043):#iterate every basic type 15 | lisen['%d'%i] = [set(),set(),set()] 16 | 17 | with open('triple file represented by ids here','r') as f: 18 | i = 1 19 | for line in f: 20 | if i%100000 == 0: 21 | print(i) 22 | i += 1 23 | tri = line[:-1].split('\t') 24 | if tri[1]!='208518': 25 | for t in en2t[tri[0]]: 26 | if len(t)<=5: 27 | lisen[t][1].add(tri[1]) 28 | lisen[t][2].add(tri[0]) 29 | if tri[2]!='-1': 30 | for t in en2t[tri[2]]: 31 | if len(t)<=5: 32 | lisen[t][0].add(tri[1]) 33 | lisen[t][2].add(tri[2]) 34 | 35 | with open('output type fragment','w') as f: 36 | for k in lisen.keys(): 37 | f.write(k+'\t'+','.join(lisen[k][0])+'|'+','.join(lisen[k][1])+'|'+','.join(lisen[k][2])+'\n') 38 | print(len(lisen)) 39 | 40 | 41 | -------------------------------------------------------------------------------- /src/nlp/tool/Main.java: -------------------------------------------------------------------------------- 1 | package nlp.tool; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.IOException; 5 | import java.io.InputStreamReader; 6 | 7 | import nlp.ds.DependencyTree; 8 | import nlp.ds.Sentence; 9 | import qa.Globals; 10 | 11 | public class Main { 12 | public static void main (String[] args) { 13 | Globals.init(); 14 | BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); 15 | try { 16 | while (true) { 17 | System.out.println("Test maltparser."); 18 | System.out.print("Please input the NL question: "); 19 | String question = br.readLine(); 20 | if (question.length() <= 3) 21 | break; 22 | try { 23 | long t1 = System.currentTimeMillis(); 24 | Sentence s = new Sentence(question); 25 | DependencyTree dt = new DependencyTree(s, Globals.stanfordParser); 26 | System.out.println("====StanfordDependencies===="); 27 | System.out.println(dt); 28 | DependencyTree dt2 = new DependencyTree(s, Globals.maltParser); 29 | System.out.println("====MaltDependencies===="); 30 | System.out.println(dt2); 31 | long t2 = System.currentTimeMillis(); 32 | System.out.println("time=" + (t2-t1) + "ms"); 33 | } catch (Exception e) { 34 | e.printStackTrace(); 35 | } 36 | } 37 | } catch (IOException e) { 38 | e.printStackTrace(); 39 | } 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /src/qa/mapping/EntityFragmentDict.java: -------------------------------------------------------------------------------- 1 | package qa.mapping; 2 | 3 | import java.util.HashMap; 4 | 5 | import fgmt.EntityFragment; 6 | 7 | public class EntityFragmentDict { 8 | //public HashMap entityFragmentDictionary = new HashMap(); 9 | public HashMap entityFragmentDictionary = new HashMap(); 10 | 11 | public EntityFragment getEntityFragmentByEid (Integer eid) 12 | { 13 | if (!entityFragmentDictionary.containsKey(eid)) 14 | { 15 | entityFragmentDictionary.put(eid, EntityFragment.getEntityFragmentByEntityId(eid)); 16 | } 17 | return entityFragmentDictionary.get(eid); 18 | 19 | } 20 | 21 | /* 22 | * Old version, search by name 23 | * */ 24 | // public EntityFragment getEntityFragmentByName (String name) { 25 | // if (name.startsWith("?")) { 26 | // return null; 27 | // } 28 | // if (!entityFragmentDictionary.containsKey(name)) { 29 | // String fgmt = EntityFragment.getEntityFgmtStringByName(name); 30 | // if (fgmt != null) 31 | // { 32 | // int eid = EntityFragmentFields.entityName2Id.get(name); 33 | // entityFragmentDictionary.put(name, new EntityFragment(eid, fgmt)); 34 | // } 35 | // else { 36 | // entityFragmentDictionary.put(name, null); 37 | // } 38 | // } 39 | // return entityFragmentDictionary.get(name); 40 | // 41 | // } 42 | } 43 | -------------------------------------------------------------------------------- /genrate_fragments/step3_split.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | ''' 3 | Step3: extract entity, type and predicate out of the original triple files and allocate ids 4 | ''' 5 | entities = set() 6 | types = set() 7 | predicate = set() 8 | with open('triple file here','r') as f: 9 | i = 1 10 | k = 0 11 | for line in f.readlines(): 12 | tri = line[:-2].split('\t') 13 | entities.add(tri[0]) 14 | predicate.add(tri[1]) 15 | if len(tri)==2: 16 | print("%s:%d"%(line,i)) 17 | i += 1 18 | k += 1 19 | print(tri) 20 | continue 21 | if '"' in tri[2][0] or '"' in tri[2][0]: 22 | continue 23 | entities.add(tri[2]) 24 | if tri[1]=='': 25 | types.add(tri[2]) 26 | if i%10000 == 0: 27 | print(i) 28 | i += 1 29 | print(i) 30 | print(k) 31 | 32 | e = open('entity id file','w') 33 | t = open('type id file','w') 34 | p = open('predicate id file','w') 35 | 36 | k = 0 37 | for item in entities: 38 | if item[-1]!='\n': 39 | e.write(item+'\t%d'%k+'\n') 40 | else: 41 | e.write(item[:-1]+'\t%d'%k+'\n') 42 | k += 1 43 | 44 | k = 0 45 | for item in types: 46 | if item[-1]!='\n': 47 | t.write(item+'\t%d'%k+'\n') 48 | else: 49 | t.write(item[:-1]+'\t%d'%k+'\n') 50 | k += 1 51 | 52 | k = 0 53 | for item in predicate: 54 | if item[-1]!='\n': 55 | p.write(item+'\t%d'%k+'\n') 56 | else: 57 | p.write(item[:-1]+'\t%d'%k+'\n') 58 | k += 1 59 | -------------------------------------------------------------------------------- /genrate_fragments/step4_triple_to_number.py: -------------------------------------------------------------------------------- 1 | #encoding=utf-8 2 | ''' 3 | Step4: transform the triples and represent entity, type and predicate with id 4 | ''' 5 | eid = {} 6 | tid = {} 7 | pid = {} 8 | 9 | with open('entity id file here','r') as e: 10 | for line in e: 11 | dub = line[:-1].split('\t') 12 | eid[dub[0]] = dub[1] 13 | 14 | 15 | with open('type id file here','r') as t: 16 | for line in t: 17 | dub = line[:-1].split('\t') 18 | tid[dub[0]] = dub[1] 19 | 20 | 21 | with open('predicate id file here','r') as p: 22 | for line in p: 23 | dub = line[:-1].split('\t') 24 | pid[dub[0]] = dub[1] 25 | 26 | print("%d %d %d"%(len(eid),len(tid),len(pid))) 27 | 28 | rt = open("output triple file here",'w') 29 | with open('input triple file here','r') as f: 30 | i = 1; 31 | for line in f: 32 | tri = line[:-2].split('\t') 33 | if tri[1] == '': 34 | if not tid.has_key(tri[2]): 35 | tid[tri[2]] = '-1' 36 | try: 37 | rt.write("%s\t%s\t%s\n"%(eid[tri[0]],pid[tri[1]],tid[tri[2]])) 38 | except KeyError: 39 | print(line) 40 | print(i) 41 | else: 42 | if tri[2][0]=='"': 43 | try: 44 | rt.write("%s\t%s\t-1\n"%(eid[tri[0]],pid[tri[1]])) 45 | except KeyError: 46 | print(line) 47 | print(i) 48 | else: 49 | try: 50 | rt.write("%s\t%s\t%s\n"%(eid[tri[0]],pid[tri[1]],eid[tri[2]])) 51 | except KeyError: 52 | print(line) 53 | print(i) 54 | 55 | -------------------------------------------------------------------------------- /src/fgmt/VariableFragment.java: -------------------------------------------------------------------------------- 1 | package fgmt; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collections; 5 | import java.util.HashSet; 6 | import java.util.Iterator; 7 | 8 | public class VariableFragment extends Fragment { 9 | public static final int magic_number = -265; 10 | 11 | public ArrayList> candTypes = null; 12 | public HashSet candEntities = null; 13 | public boolean mayLiteral = false; 14 | 15 | public VariableFragment() 16 | { 17 | fragmentType = typeEnum.VAR_FRAGMENT; 18 | candTypes = new ArrayList>(); 19 | candEntities = new HashSet(); 20 | } 21 | 22 | @Override 23 | public String toString() 24 | { 25 | return "("+ candEntities.size() +")"; 26 | } 27 | 28 | public boolean containsAll(HashSet s1) { 29 | Iterator> it = candTypes.iterator(); 30 | while(it.hasNext()) { 31 | HashSet s2 = it.next(); 32 | if (s2.contains(magic_number)) { 33 | if (!Collections.disjoint(s1, s2)) { 34 | return true; 35 | } 36 | } 37 | else { 38 | if (s1.containsAll(s2) && s2.containsAll(s1)) { 39 | return true; 40 | } 41 | } 42 | } 43 | return false; 44 | } 45 | 46 | public boolean contains(Integer i) { 47 | Iterator> it = candTypes.iterator(); 48 | while(it.hasNext()) { 49 | HashSet s = it.next(); 50 | if (s.contains(i)) { 51 | return true; 52 | } 53 | } 54 | return false; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/rdf/TypeMapping.java: -------------------------------------------------------------------------------- 1 | package rdf; 2 | 3 | import qa.Globals; 4 | 5 | public class TypeMapping implements Comparable 6 | { 7 | public Integer typeID = null; 8 | public String typeName = null; 9 | public double score = 0; 10 | 11 | /* 12 | * 1, For standard type (DBO type in DBpedia), relation = typePredicateID (rdf:type) 13 | * 2, For nonstandard type, typeID = -1 14 | * 3, If add type into triples, need relation | eg, Which professional surfers were born in Australia? (?uri dbo:occupation res:Surfing) relation = dbo:occupation 15 | * 4, If needn't add type, relation = -1 | eg, Who was the father of [Queen] Elizabeth II 16 | * */ 17 | public int prefferdRelation = Globals.pd.typePredicateID; 18 | 19 | public TypeMapping(Integer tid, String type, double sco) 20 | { 21 | typeID = tid; 22 | typeName = type; 23 | score = sco; 24 | } 25 | 26 | public TypeMapping(Integer tid, String type, Integer relation, double sco) 27 | { 28 | typeID = tid; 29 | typeName = type.replace("_", ""); 30 | score = sco; 31 | prefferdRelation = relation; 32 | } 33 | 34 | // In descending order: big --> small 35 | public int compareTo(TypeMapping o) 36 | { 37 | double diff = this.score - o.score; 38 | if (diff > 0) return -1; 39 | else if (diff < 0) return 1; 40 | else return 0; 41 | } 42 | 43 | public int hashCode() 44 | { 45 | return typeID.hashCode(); 46 | } 47 | 48 | public String toString() 49 | { 50 | StringBuilder res = new StringBuilder(typeName+"("+score+")"); 51 | return res.toString(); 52 | } 53 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2018, 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /src/lcn/Main.java: -------------------------------------------------------------------------------- 1 | package lcn; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Scanner; 5 | 6 | import qa.Globals; 7 | import qa.mapping.EntityFragmentDict; 8 | 9 | 10 | public class Main { 11 | //Test: searching Entities and Types through Lucene Index. 12 | public static void main(String[] aStrings) throws Exception{ 13 | 14 | //SearchInLiteralSubset se = new SearchInLiteralSubset(); 15 | SearchInTypeShortName st = new SearchInTypeShortName(); 16 | SearchInEntityFragments sf = new SearchInEntityFragments(); 17 | EntityFragmentDict efd = new EntityFragmentDict(); 18 | EntityFragmentFields eff = null; 19 | Globals.localPath = "D:/husen/gAnswer/"; 20 | Scanner sc = new Scanner(System.in); 21 | System.out.print("input name: "); 22 | 23 | while(sc.hasNextLine()) 24 | { 25 | String literal = sc.nextLine(); 26 | System.out.println(literal); 27 | 28 | //literal = cnlp.getBaseFormOfPattern(literal); 29 | 30 | //search Type 31 | ArrayList result = st.searchType(literal, 0.4, 0.8, 10); 32 | System.out.println("TypeShortName-->RESULT:"); 33 | for (String s : result) { 34 | System.out.println("<"+s + ">"); 35 | } 36 | 37 | //search Ent Fragment 38 | // int eId = EntityFragmentFields.entityName2Id.get(literal); 39 | // EntityFragment ef = EntityFragment.getEntityFragmentByEntityId(eId); 40 | // System.out.println(ef); 41 | 42 | //search Ent Name 43 | // ArrayList result = sf.searchName(literal, 0.4, 0.8, 50); 44 | // System.out.println("EntityName-->RESULT:"); 45 | // for(EntityNameAndScore enas: result) 46 | // { 47 | // System.out.println(enas); 48 | // } 49 | 50 | System.out.print("input name: "); 51 | } 52 | sc.close(); 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /src/rdf/SemanticUnit.java: -------------------------------------------------------------------------------- 1 | package rdf; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | 6 | import rdf.SemanticRelation; 7 | import nlp.ds.DependencyTreeNode; 8 | import nlp.ds.Word; 9 | 10 | public class SemanticUnit 11 | { 12 | public Word centerWord = null; 13 | public ArrayList describeNodeList = new ArrayList(); 14 | public ArrayList neighborUnitList = new ArrayList(); 15 | public HashMap RelationList = new HashMap(); 16 | 17 | public boolean isSubj = true; 18 | public Integer prefferdType = null; 19 | 20 | public SemanticUnit(Word center, boolean isSubJ) 21 | { 22 | centerWord = center; 23 | isSubj = isSubJ; 24 | } 25 | 26 | public SemanticUnit copy() 27 | { 28 | SemanticUnit su = new SemanticUnit(this.centerWord, this.isSubj); 29 | su.describeNodeList = (ArrayList) this.describeNodeList.clone(); 30 | su.neighborUnitList = (ArrayList) this.neighborUnitList.clone(); 31 | su.RelationList = (HashMap) this.RelationList.clone(); 32 | return su; 33 | } 34 | 35 | @Override 36 | public int hashCode() { 37 | return centerWord.hashCode(); 38 | } 39 | 40 | @Override 41 | public boolean equals(Object o) { 42 | if (o instanceof SemanticUnit) { 43 | SemanticUnit su2 = (SemanticUnit) o; 44 | if(this.centerWord.equals(su2.centerWord)) 45 | return true; 46 | } 47 | return false; 48 | } 49 | 50 | @Override 51 | public String toString() 52 | { 53 | String ret = "<" + centerWord + ", {"; 54 | for(SemanticUnit su: neighborUnitList) 55 | ret += su.centerWord + ", "; 56 | ret += "}>"; 57 | 58 | return ret; 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /src/rdf/ImplicitRelation.java: -------------------------------------------------------------------------------- 1 | package rdf; 2 | 3 | import fgmt.TypeFragment; 4 | import qa.Globals; 5 | import lcn.EntityFragmentFields; 6 | 7 | public class ImplicitRelation { 8 | 9 | public String subj = null; 10 | public String obj = null; 11 | 12 | public int pId = -1; 13 | public double score = 0; 14 | 15 | //Role : 1|ent , 2|type_ , 3|var 16 | public enum roleEnum {ENTITY, TYPE_CONSTANT, TYPE_VARIABLE, VARIABLE}; 17 | public int subjRole = -1; 18 | public int objRole = -1; 19 | public int subjId = -1; 20 | public int objId = -1; 21 | 22 | public ImplicitRelation(String s, String o, int pid, double sc) 23 | { 24 | pId = pid; 25 | subj = s; 26 | obj = o; 27 | score = sc; 28 | subjId = EntityFragmentFields.entityName2Id.get(s); 29 | if(pId != Globals.pd.typePredicateID) 30 | objId = EntityFragmentFields.entityName2Id.get(o); 31 | else 32 | objId = TypeFragment.typeShortName2IdList.get(o).get(0); 33 | } 34 | 35 | public ImplicitRelation(Integer sId, Integer oId, int pid, double sc) 36 | { 37 | pId = pid; 38 | subjId = sId; 39 | objId = oId; 40 | score = sc; 41 | } 42 | 43 | public void setSubjectId(Integer s) 44 | { 45 | subjId = s; 46 | } 47 | 48 | public void setObjectId(Integer o) 49 | { 50 | objId = o; 51 | } 52 | 53 | public void setSubject(String s) 54 | { 55 | subj = s; 56 | } 57 | 58 | public void setObject(String o) 59 | { 60 | obj = o; 61 | } 62 | 63 | public int hashCode() 64 | { 65 | return new Integer(pId).hashCode() ^ new Integer(subjId).hashCode() ^ new Integer(objId).hashCode(); 66 | } 67 | 68 | @Override 69 | public boolean equals(Object ir) 70 | { 71 | ImplicitRelation tmpIr = (ImplicitRelation) ir; 72 | if (pId == tmpIr.pId && subjId == tmpIr.subjId && objId == tmpIr.objId) 73 | return true; 74 | else return false; 75 | } 76 | 77 | } 78 | -------------------------------------------------------------------------------- /src/nlp/tool/NERecognizer.java: -------------------------------------------------------------------------------- 1 | package nlp.tool; 2 | 3 | import java.util.List; 4 | 5 | import qa.Globals; 6 | 7 | import nlp.ds.Sentence; 8 | import nlp.ds.Word; 9 | 10 | import edu.stanford.nlp.ie.AbstractSequenceClassifier; 11 | import edu.stanford.nlp.ie.crf.CRFClassifier; 12 | import edu.stanford.nlp.ling.CoreAnnotations.AnswerAnnotation; 13 | import edu.stanford.nlp.ling.CoreAnnotations.PositionAnnotation; 14 | import edu.stanford.nlp.ling.CoreLabel; 15 | 16 | public class NERecognizer { 17 | 18 | static String serializedClassifier; 19 | static AbstractSequenceClassifier classifier; 20 | //public static String localPath="E:\\Hanshuo\\gAnswer\\"; 21 | 22 | public NERecognizer() { 23 | serializedClassifier = Globals.localPath+"lib/stanford-ner-2012-11-11/classifiers/english.all.3class.distsim.crf.ser.gz"; 24 | classifier = CRFClassifier.getClassifierNoExceptions(serializedClassifier); 25 | } 26 | 27 | /*public NERecognizer(String basePath, boolean flag) { 28 | serializedClassifier = "WEB-INF\\lib\\stanford-ner-2012-11-11\\stanford-ner-2012-11-11\\classifiers\\english.all.3class.distsim.crf.ser.gz"; 29 | }*/ 30 | 31 | public void recognize(Sentence sentence) { 32 | List lcl = classifier.classify(sentence.plainText).get(0); 33 | for (CoreLabel cl : lcl) { 34 | int position = Integer.parseInt(cl.get(PositionAnnotation.class))+1; 35 | Word w = sentence.getWordByIndex(position); 36 | String ner = cl.get(AnswerAnnotation.class); 37 | if (ner.equals("O")) w.ner = null; 38 | else w.ner = ner; 39 | } 40 | } 41 | 42 | public static void main(String[] args) { 43 | System.out.println("Test NER"); 44 | Globals.init(); 45 | 46 | Sentence s = new Sentence("I go to school at Stanford University, which is located in California.");//"Which states of Germany are governed by the Social Democratic Party?" 47 | Globals.nerRecognizer.recognize(s); 48 | for (Word word : s.words) { 49 | System.out.print(word + " "); 50 | System.out.println("ner=" + word.ner); 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /genrate_fragments/step1_clean_triple.py: -------------------------------------------------------------------------------- 1 | import re 2 | ''' 3 | Step 1: Clean the triple file. In the dbpedia case, we just need the part of resource URI that indicate entity/type/predicate names. 4 | ''' 5 | fileName = []#List of triple files to be process 6 | notRdf = open('./notRdf.txt','w')#Record the lines that refers to a type but not rdf:type 7 | for index2,fname in enumerate(fileName): 8 | f = open('./'+fname) 9 | triple = open('output triple files here','w') 10 | prefix_f = open('output prefix files here','w')# save the prefix in files in case of it may be useful in the future. 11 | i = 0 12 | count = 0 13 | prefix_set = set() 14 | for line in f: 15 | if line[0] != '<': 16 | print(i) 17 | i = i + 1 18 | count += 1 19 | continue 20 | line = line[:-3].replace('> <','>$-$-$<').replace('> "','>$-$-$"') 21 | line = line.split('$-$-$') 22 | if i==0: 23 | i += 1 24 | continue 25 | new_line=[] 26 | if "type>" in line[1]: 27 | if "rdf" not in line[1]: 28 | notRdf.write(str(line)+'\n') 29 | continue 30 | for index,item in enumerate(line): 31 | if not item: 32 | count +=1 33 | break 34 | if item[0]=='<': 35 | pos = item.rfind('/') 36 | word = item[pos+1:-1].split("#") 37 | if len(word)<2: 38 | new_line.append('<'+word[0]+'>') 39 | else: 40 | new_line.append('<'+word[1]+'>') 41 | if index == 1: 42 | tmp = new_line[1][1:len(new_line[1])-1] 43 | pos2 = line[1].rfind(tmp) 44 | prefix = line[1][1:pos2-1] 45 | prefix_set.add(tmp + '^^^'+prefix+'\n') 46 | continue 47 | elif item.count('"') >=2: 48 | item = item.split('^^')[0].split('@')[0] 49 | pattern = re.compile('"(.*)"') 50 | word = '"'+''.join(pattern.findall(item))+'"' 51 | new_line.append(word) 52 | continue 53 | else: 54 | print(i) 55 | i += 1 56 | #print('\t'.join(new_line)) 57 | if i%1000000==0: 58 | print("%d:%d"%(8,i)) 59 | triple.write('\t'.join(new_line)+'\n') 60 | for item in prefix_set: 61 | prefix_f.write(item) 62 | f.close() 63 | triple.close() 64 | prefix_f.close() 65 | 66 | -------------------------------------------------------------------------------- /src/nlp/tool/StanfordParser.java: -------------------------------------------------------------------------------- 1 | package nlp.tool; 2 | 3 | import java.io.StringReader; 4 | import java.util.List; 5 | 6 | import edu.stanford.nlp.ling.CoreLabel; 7 | import edu.stanford.nlp.objectbank.TokenizerFactory; 8 | import edu.stanford.nlp.parser.lexparser.LexicalizedParser; 9 | import edu.stanford.nlp.process.CoreLabelTokenFactory; 10 | import edu.stanford.nlp.process.PTBTokenizer; 11 | import edu.stanford.nlp.trees.GrammaticalStructure; 12 | import edu.stanford.nlp.trees.GrammaticalStructureFactory; 13 | import edu.stanford.nlp.trees.PennTreebankLanguagePack; 14 | import edu.stanford.nlp.trees.Tree; 15 | import edu.stanford.nlp.trees.TreebankLanguagePack; 16 | 17 | public class StanfordParser { 18 | private LexicalizedParser lp; 19 | private TokenizerFactory tokenizerFactory; 20 | private TreebankLanguagePack tlp; 21 | private GrammaticalStructureFactory gsf; 22 | 23 | public StanfordParser() { 24 | lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"); 25 | tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); 26 | tlp = new PennTreebankLanguagePack(); 27 | gsf = tlp.grammaticalStructureFactory(); 28 | } 29 | 30 | public GrammaticalStructure getGrammaticalStructure (String sentence) { 31 | List rawWords2 = 32 | tokenizerFactory.getTokenizer(new StringReader(sentence)).tokenize(); 33 | // Converts a Sentence/List/String into a Tree. 34 | // In all circumstances, the input will be treated as a single sentence to be parsed. 35 | Tree parse = lp.apply(rawWords2); 36 | 37 | return gsf.newGrammaticalStructure(parse); 38 | /*List tdl = gs.typedDependencies(false); 39 | for (TypedDependency td : tdl) { 40 | System.out.println(td.reln().getShortName()+"("+td.gov()+","+td.dep()+")"); 41 | System.out.println("gov="+td.gov() 42 | +"\tgov.index=" 43 | +td.gov().index() 44 | +"\tgov.value=" 45 | +td.gov().value() 46 | +"\tgov.pos=" 47 | +((TreeGraphNode)td.gov().parent()).value()); 48 | }*/ 49 | //System.out.println(tdl); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/nlp/ds/Sentence.java: -------------------------------------------------------------------------------- 1 | package nlp.ds; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | 6 | import qa.Globals; 7 | import qa.Query; 8 | import rdf.MergedWord; 9 | 10 | public class Sentence { 11 | public String plainText = null; 12 | public Word[] words = null; 13 | public HashMap map = null; 14 | 15 | public DependencyTree dependencyTreeStanford = null; 16 | public DependencyTree dependencyTreeMalt = null; 17 | 18 | public enum SentenceType {SpecialQuestion,GeneralQuestion,ImperativeSentence} 19 | public SentenceType sentenceType = SentenceType.SpecialQuestion; 20 | 21 | public Sentence (String s) 22 | { 23 | plainText = s; 24 | words = Globals.coreNLP.getTaggedWords(plainText); 25 | map = new HashMap(); 26 | for (Word w : words) 27 | map.put(w.key, w); 28 | } 29 | 30 | public Sentence (Query query, String s) 31 | { 32 | plainText = s; 33 | words = Globals.coreNLP.getTaggedWords(plainText); 34 | // inherit NodeRecognition's information 35 | for(Word word: words) 36 | { 37 | for(MergedWord mWord: query.mWordList) 38 | { 39 | if(word.originalForm.equals(mWord.name)) 40 | { 41 | word.mayLiteral = mWord.mayLiteral; 42 | word.mayEnt = mWord.mayEnt; 43 | word.mayType = mWord.mayType; 44 | word.mayCategory = mWord.mayCategory; 45 | word.tmList = mWord.tmList; 46 | word.emList = mWord.emList; 47 | word.category = mWord.category; 48 | } 49 | } 50 | } 51 | map = new HashMap(); 52 | for (Word w : words) 53 | map.put(w.key, w); 54 | } 55 | public ArrayList getWordsByString (String w) { 56 | ArrayList ret = new ArrayList(); 57 | for (Word wo: words) { 58 | if (wo.originalForm.equals(w)) ret.add(wo); 59 | } 60 | return ret; 61 | } 62 | 63 | public Word getWordByIndex (int idx) { 64 | return words[idx-1]; 65 | } 66 | 67 | public Word getWordByKey (String k) { 68 | return map.get(k); 69 | } 70 | 71 | public boolean hasModifier(Word w) 72 | { 73 | for(Word word: words) 74 | if(word!=w && word.modifiedWord==w) 75 | return true; 76 | return false; 77 | } 78 | 79 | public void printNERResult () { 80 | for (Word word : words) { 81 | System.out.print(word + " "); 82 | System.out.println("ner=" + word.ner); 83 | } 84 | } 85 | } 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /src/lcn/EntityFragmentFields.java: -------------------------------------------------------------------------------- 1 | package lcn; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.File; 5 | import java.io.FileInputStream; 6 | import java.io.IOException; 7 | import java.io.InputStreamReader; 8 | import java.util.HashMap; 9 | 10 | import qa.Globals; 11 | 12 | public class EntityFragmentFields { 13 | 14 | // entity dictionary 15 | public static HashMap entityName2Id = null; 16 | public static HashMap entityId2Name = null; 17 | public static HashMap entityFragmentString = null; 18 | 19 | public static void load() throws IOException 20 | { 21 | String filename = Globals.localPath+"data/DBpedia2016/fragments/id_mappings/16entity_id.txt"; 22 | String fragmentFileName = Globals.localPath+"data/DBpedia2016/fragments/entity_RDF_fragment/16entity_fragment.txt"; 23 | File file = new File(filename); 24 | BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file),"utf-8")); 25 | 26 | entityName2Id = new HashMap(); 27 | entityId2Name = new HashMap(); 28 | 29 | long t1, t2, t3; 30 | 31 | t1 = System.currentTimeMillis(); 32 | // load entity id 33 | System.out.println("Loading entity id ..."); 34 | String line; 35 | while((line = br.readLine()) != null) 36 | { 37 | String[] lines = line.split("\t"); 38 | String entName = lines[0].substring(1, lines[0].length()-1); 39 | 40 | entityName2Id.put(entName, Integer.parseInt(lines[1])); 41 | entityId2Name.put(Integer.parseInt(lines[1]), entName); 42 | } 43 | br.close(); 44 | t2 = System.currentTimeMillis(); 45 | System.out.println("Load "+entityId2Name.size()+" entity ids in "+ (t2-t1) + "ms."); 46 | 47 | // load entity fragment 48 | System.out.println("Loading entity fragments ..."); 49 | br = new BufferedReader(new InputStreamReader(new FileInputStream(fragmentFileName),"utf-8")); 50 | entityFragmentString = new HashMap(); 51 | while((line = br.readLine()) != null) 52 | { 53 | String[] lines = line.split("\t"); 54 | if(lines.length != 2) 55 | continue; 56 | int eId = Integer.parseInt(lines[0]); 57 | entityFragmentString.put(eId, lines[1]); 58 | } 59 | t3 = System.currentTimeMillis(); 60 | System.out.println("Load "+entityFragmentString.size()+" entity fragments in "+ (t3-t2) + "ms."); 61 | 62 | br.close(); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/nlp/tool/MaltParser.java: -------------------------------------------------------------------------------- 1 | package nlp.tool; 2 | 3 | 4 | import nlp.ds.Sentence; 5 | import nlp.ds.Word; 6 | 7 | import org.maltparser.MaltParserService; 8 | import org.maltparser.core.exception.MaltChainedException; 9 | import org.maltparser.core.syntaxgraph.DependencyStructure; 10 | 11 | import qa.Globals; 12 | 13 | public class MaltParser { 14 | private MaltParserService service = null; 15 | public MaltParser() { 16 | try 17 | { 18 | System.out.print("Loading MaltParser ..."); 19 | service = new MaltParserService(); 20 | // Inititalize the parser model 'model0' and sets the working directory to '.' and sets the logging file to 'parser.log' 21 | //service.initializeParserModel("-c engmalt.linear-1.7 -m parse -w . -lfi parser.log"); 22 | service.initializeParserModel("-c engmalt.linear-1.7 -m parse -w "+Globals.localPath+"lib/maltparser-1.9.1 -lfi parser.log"); 23 | firstParse(); 24 | System.out.println("ok!"); 25 | } catch (MaltChainedException e) { 26 | e.printStackTrace(); 27 | System.err.println("MaltParser exception: " + e.getMessage()); 28 | } 29 | } 30 | 31 | private void firstParse() { 32 | String[] tokens = new String[12]; 33 | tokens[0] = "1\tIn\t_\tIN\tIN\t_"; 34 | tokens[1] = "2\twhich\t_\tWDT\tWDT\t_"; 35 | tokens[2] = "3\tmovies\t_\tNNS\tNNS\t_"; 36 | tokens[3] = "4\tdirected\t_\tVBN\tVBN\t_"; 37 | tokens[4] = "5\tby\t_\tIN\tIN\t_"; 38 | tokens[5] = "6\tGarry\t_\tNNP\tNNP\t_"; 39 | tokens[6] = "7\tMarshall\t_\tNNP\tNNP\t_"; 40 | tokens[7] = "8\twas\t_\tVBD\tVBD\t_"; 41 | tokens[8] = "9\tJulia\t_\tNNP\tNNP\t_"; 42 | tokens[9] = "10\tRoberts\t_\tNNP\tNNP\t_"; 43 | tokens[10] = "11\tstarring\t_\tVBG\tVBG\t_"; 44 | tokens[11] = "12\t?\t_\t.\t.\t_"; 45 | try { 46 | service.parse(tokens); 47 | } catch (MaltChainedException e) { 48 | e.printStackTrace(); 49 | } 50 | } 51 | 52 | public DependencyStructure getDependencyStructure (Sentence sentence) { 53 | try { 54 | return service.parse(getTaggedTokens(sentence)); 55 | } catch (MaltChainedException e) { 56 | e.printStackTrace(); 57 | } 58 | return null; 59 | } 60 | 61 | private String[] getTaggedTokens (Sentence sentence) { 62 | String[] ret = new String[sentence.words.length]; 63 | int count = 0; 64 | for (Word w : sentence.words) { 65 | ret[count] = new String(""+w.position+"\t"+w.originalForm+"\t_\t"+w.posTag+"\t"+w.posTag+"\t_"); 66 | count ++; 67 | } 68 | return ret; 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/nlp/tool/MaltParserCon.java: -------------------------------------------------------------------------------- 1 | package nlp.tool; 2 | 3 | import java.io.File; 4 | import java.net.URL; 5 | 6 | import nlp.ds.Sentence; 7 | import nlp.ds.Word; 8 | 9 | import org.maltparser.concurrent.ConcurrentMaltParserModel; 10 | import org.maltparser.concurrent.ConcurrentMaltParserService; 11 | import org.maltparser.concurrent.graph.ConcurrentDependencyGraph; 12 | import org.maltparser.core.exception.MaltChainedException; 13 | //import org.maltparser.core.syntaxgraph.DependencyStructure; 14 | 15 | 16 | public class MaltParserCon { 17 | private ConcurrentMaltParserModel model = null; 18 | public ConcurrentDependencyGraph outputGraph = null; 19 | 20 | public MaltParserCon(){ 21 | try{ 22 | System.out.println("Loading Maltparser...\n"); 23 | URL ModelURL = new File("output/engmalt.linear-1.7.mco").toURI().toURL(); 24 | model = ConcurrentMaltParserService.initializeParserModel(ModelURL); 25 | firstTest(); 26 | System.out.println("ok!\n"); 27 | }catch(Exception e){ 28 | e.printStackTrace(); 29 | System.err.println("MaltParser exception: " + e.getMessage()); 30 | } 31 | } 32 | 33 | private void firstTest(){ 34 | String[] tokens = new String[12]; 35 | tokens[0] = "1\tIn\t_\tIN\tIN\t_"; 36 | tokens[1] = "2\twhich\t_\tWDT\tWDT\t_"; 37 | tokens[2] = "3\tmovies\t_\tNNS\tNNS\t_"; 38 | tokens[3] = "4\tdirected\t_\tVBN\tVBN\t_"; 39 | tokens[4] = "5\tby\t_\tIN\tIN\t_"; 40 | tokens[5] = "6\tGarry\t_\tNNP\tNNP\t_"; 41 | tokens[6] = "7\tMarshall\t_\tNNP\tNNP\t_"; 42 | tokens[7] = "8\twas\t_\tVBD\tVBD\t_"; 43 | tokens[8] = "9\tJulia\t_\tNNP\tNNP\t_"; 44 | tokens[9] = "10\tRoberts\t_\tNNP\tNNP\t_"; 45 | tokens[10] = "11\tstarring\t_\tVBG\tVBG\t_"; 46 | tokens[11] = "12\t?\t_\t.\t.\t_"; 47 | try { 48 | outputGraph = model.parse(tokens); 49 | } catch (Exception e) { 50 | e.printStackTrace(); 51 | } 52 | System.out.println(outputGraph); 53 | } 54 | 55 | public ConcurrentDependencyGraph getDependencyStructure (Sentence sentence) { 56 | try { 57 | return model.parse(getTaggedTokens(sentence)); 58 | } catch (MaltChainedException e) { 59 | e.printStackTrace(); 60 | } 61 | return null; 62 | } 63 | 64 | private String[] getTaggedTokens (Sentence sentence) { 65 | String[] ret = new String[sentence.words.length]; 66 | int count = 0; 67 | for (Word w : sentence.words) { 68 | ret[count] = new String(""+w.position+"\t"+w.originalForm+"\t_\t"+w.posTag+"\t"+w.posTag+"\t_"); 69 | count ++; 70 | } 71 | return ret; 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/application/GinfoHandler.java: -------------------------------------------------------------------------------- 1 | package application; 2 | import java.io.IOException; 3 | 4 | import javax.servlet.ServletException; 5 | import javax.servlet.http.HttpServletRequest; 6 | import javax.servlet.http.HttpServletResponse; 7 | 8 | import log.QueryLogger; 9 | 10 | import org.json.*; 11 | import org.eclipse.jetty.server.Request; 12 | import org.eclipse.jetty.server.handler.AbstractHandler; 13 | 14 | import qa.Globals; 15 | 16 | public class GinfoHandler extends AbstractHandler{ 17 | 18 | public static String errorHandle(String status,String message,String question,QueryLogger qlog){ 19 | JSONObject exobj = new JSONObject(); 20 | try { 21 | exobj.put("status", status); 22 | exobj.put("message", message); 23 | exobj.put("query", question); 24 | if(qlog!=null&&qlog.rankedSparqls!=null&&qlog.rankedSparqls.size()>0){ 25 | exobj.put("sparql", qlog.rankedSparqls.get(0).toStringForGStore2()); 26 | } 27 | } catch (Exception e1) { 28 | } 29 | return exobj.toString(); 30 | } 31 | 32 | public void handle(String target, Request baseRequest, HttpServletRequest request, HttpServletResponse response) 33 | throws IOException, ServletException { 34 | try{ 35 | response.setContentType("text/html;charset=utf-8"); 36 | response.setStatus(HttpServletResponse.SC_OK); 37 | JSONObject infoobj = new JSONObject(); 38 | 39 | infoobj.put("version", Globals.Version); 40 | infoobj.put("dataset", Globals.Dataset); 41 | infoobj.put("GDB system", Globals.GDBsystem); 42 | 43 | //TODO add more info 44 | baseRequest.setHandled(true); 45 | response.getWriter().println(infoobj.toString()); 46 | } 47 | catch(Exception e){ 48 | if(e instanceof IOException){ 49 | try { 50 | baseRequest.setHandled(true); 51 | response.getWriter().println(errorHandle("500","IOException","",null)); 52 | } catch (Exception e1) { 53 | } 54 | } 55 | else if(e instanceof JSONException){ 56 | try { 57 | baseRequest.setHandled(true); 58 | response.getWriter().println(errorHandle("500","JSONException","",null)); 59 | } catch (Exception e1) { 60 | } 61 | } 62 | else if(e instanceof ServletException){ 63 | try { 64 | baseRequest.setHandled(true); 65 | response.getWriter().println(errorHandle("500","ServletException","",null)); 66 | } catch (Exception e1) { 67 | } 68 | } 69 | else { 70 | try { 71 | baseRequest.setHandled(true); 72 | response.getWriter().println(errorHandle("500","Unkown Exception","",null)); 73 | } catch (Exception e1) { 74 | } 75 | } 76 | } 77 | } 78 | 79 | } 80 | -------------------------------------------------------------------------------- /src/lcn/SearchInEntityFragments.java: -------------------------------------------------------------------------------- 1 | package lcn; 2 | 3 | import java.io.IOException; 4 | import java.util.ArrayList; 5 | 6 | import org.apache.lucene.analysis.Analyzer; 7 | import org.apache.lucene.analysis.standard.StandardAnalyzer; 8 | import org.apache.lucene.queryParser.ParseException; 9 | import org.apache.lucene.queryParser.QueryParser; 10 | import org.apache.lucene.search.Hits; 11 | import org.apache.lucene.search.IndexSearcher; 12 | import org.apache.lucene.search.Query; 13 | 14 | import qa.Globals; 15 | 16 | 17 | public class SearchInEntityFragments { 18 | 19 | /* 20 | * Search entity in Lucene 21 | * */ 22 | public ArrayList searchName(String literal, double thres1, double thres2, int k) throws IOException { 23 | Hits hits = null; 24 | String queryString = null; 25 | Query query = null; 26 | 27 | IndexSearcher searcher = new IndexSearcher(Globals.localPath+"data/DBpedia2016/lucene/entity_fragment_index"); 28 | 29 | ArrayList result = new ArrayList(); 30 | 31 | queryString = literal; 32 | 33 | Analyzer analyzer = new StandardAnalyzer(); 34 | try 35 | { 36 | QueryParser qp = new QueryParser("EntityName", analyzer); 37 | query = qp.parse(queryString); 38 | } catch (ParseException e) 39 | { 40 | e.printStackTrace(); 41 | } 42 | 43 | if (searcher != null) 44 | { 45 | hits = searcher.search(query); 46 | //System.out.println("search for entity fragment hits.length=" + hits.length()); 47 | if (hits.length() > 0) 48 | { 49 | //System.out.println("find " + hits.length() + " result!"); 50 | for (int i=0; i;" 52 | // +hits.doc(i).get("EntityFragment") 53 | // + "; Score: " + hits.score(i) 54 | // + "; Score2: " + hits.score(i)*(literalLength/hits.doc(i).get("EntityName").length())); 55 | if(i= thres1) { 57 | String en = hits.doc(i).get("EntityName"); 58 | int id = Integer.parseInt(hits.doc(i).get("EntityId")); 59 | result.add(new EntityNameAndScore(id, en, hits.score(i))); 60 | } 61 | else { 62 | break; 63 | } 64 | } 65 | else { 66 | if (hits.score(i) >= thres2) { 67 | String en = hits.doc(i).get("EntityName"); 68 | int id = Integer.parseInt(hits.doc(i).get("EntityId")); 69 | result.add(new EntityNameAndScore(id, en, hits.score(i))); 70 | } 71 | else { 72 | break; 73 | } 74 | } 75 | } 76 | } 77 | } 78 | 79 | //Collections.sort(result); 80 | return result; 81 | 82 | } 83 | 84 | } 85 | -------------------------------------------------------------------------------- /src/application/GanswerHttp.java: -------------------------------------------------------------------------------- 1 | package application; 2 | import org.eclipse.jetty.server.Server; 3 | import org.eclipse.jetty.server.handler.ContextHandler; 4 | import org.eclipse.jetty.server.handler.ContextHandlerCollection; 5 | import org.eclipse.jetty.server.handler.ErrorHandler; 6 | import org.eclipse.jetty.server.Handler; 7 | 8 | import qa.Globals; 9 | 10 | public class GanswerHttp { 11 | static int maxAnswerNum = 100; 12 | static int maxSparqlNum = 3; 13 | static int defaultPort = 9999; 14 | public static void main(String[] args) throws Exception { 15 | //step 1: initialize the server with a given port 16 | if(args.length>0){ 17 | for(int k=0;k readFile(String filePath){ 12 | List lines = new ArrayList(); 13 | try { 14 | BufferedReader br = new BufferedReader(new FileReader(filePath)); 15 | String line = null; 16 | while( (line = br.readLine()) != null ){ 17 | lines.add(line); 18 | } 19 | br.close(); 20 | }catch(Exception e){ 21 | e.printStackTrace(); 22 | }finally { 23 | return lines; 24 | } 25 | } 26 | 27 | public static Set readFileAsSet(String filePath){ 28 | Set lines = new HashSet(); 29 | try { 30 | BufferedReader br = new BufferedReader(new FileReader(filePath)); 31 | String line = null; 32 | while( (line = br.readLine()) != null ){ 33 | lines.add(line); 34 | } 35 | br.close(); 36 | }catch(Exception e){ 37 | e.printStackTrace(); 38 | }finally { 39 | return lines; 40 | } 41 | } 42 | 43 | public static List readFile(InputStream is){ 44 | List lines = new ArrayList(); 45 | try { 46 | BufferedReader br = new BufferedReader(new InputStreamReader(is)); 47 | String line = null; 48 | while( (line = br.readLine()) != null ){ 49 | lines.add(line); 50 | } 51 | br.close(); 52 | }catch(Exception e){ 53 | e.printStackTrace(); 54 | }finally { 55 | return lines; 56 | } 57 | } 58 | 59 | public static String readFileAsALine(InputStream is){ 60 | List lines = readFile(is); 61 | StringBuffer buffer = new StringBuffer(); 62 | for(String line : lines){ 63 | buffer.append(line); 64 | } 65 | return buffer.toString(); 66 | } 67 | 68 | public static void writeFile(List lines, String filePath){ 69 | try{ 70 | BufferedWriter bw = new BufferedWriter(new FileWriter(filePath)); 71 | for(String line : lines){ 72 | bw.write(line+"\n"); 73 | } 74 | bw.close(); 75 | }catch(Exception e){ 76 | e.printStackTrace(); 77 | } 78 | } 79 | 80 | public static void writeFile(List lines, String filePath, boolean ifContinueWrite){ 81 | try{ 82 | BufferedWriter bw = new BufferedWriter(new FileWriter(filePath, ifContinueWrite)); 83 | for(String line : lines){ 84 | bw.write(line+"\n"); 85 | } 86 | bw.close(); 87 | }catch(Exception e){ 88 | e.printStackTrace(); 89 | } 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /src/qa/Answer.java: -------------------------------------------------------------------------------- 1 | package qa; 2 | 3 | import java.util.ArrayList; 4 | 5 | 6 | public class Answer implements Comparable{ 7 | public String questionFocusKey=null; 8 | public String questionFocusValue=null; 9 | public ArrayList otherInformationKey = null; 10 | public ArrayList otherInformationValue = null; 11 | 12 | public Answer(String qf, String[] ans) { 13 | otherInformationKey = new ArrayList(); 14 | otherInformationValue = new ArrayList(); 15 | int p1, p2; 16 | for (String line : ans) { 17 | System.out.println("line=" + line); 18 | if (line.startsWith(qf)) { 19 | questionFocusKey = qf; 20 | p1 = line.indexOf('<'); 21 | p2 = line.lastIndexOf('>'); 22 | String value = null; 23 | if (p1 != -1 && p2 != -1) { 24 | value = line.substring(p1+1, p2); 25 | } 26 | else { 27 | p1 = line.indexOf('\"'); 28 | p2 = line.lastIndexOf('\"'); 29 | if(p1 != -1 && p2 != -1) 30 | value = line.substring(p1+1, p2); 31 | else 32 | { 33 | p1 = line.indexOf(':'); 34 | value = line.substring(p1+1); 35 | } 36 | } 37 | questionFocusValue = value; 38 | } 39 | else { 40 | 41 | p1 = line.indexOf(':'); 42 | String key = line.substring(0, p1); 43 | 44 | p1 = line.indexOf('<'); 45 | p2 = line.lastIndexOf('>'); 46 | String value = null; 47 | if (p1 != -1 && p2 != -1) { 48 | value = line.substring(p1+1, p2); 49 | } 50 | else { 51 | p1 = line.indexOf('\"'); 52 | p2 = line.lastIndexOf('\"'); 53 | if(p1 != -1 && p2 != -1) 54 | value = line.substring(p1+1, p2); 55 | else 56 | { 57 | p1 = line.indexOf(':'); 58 | value = line.substring(p1+1); 59 | } 60 | } 61 | 62 | otherInformationKey.add(key); 63 | otherInformationValue.add(value); 64 | } 65 | } 66 | 67 | // Sove BUG: GStore return messy code in questionFocusKey 68 | if (questionFocusKey==null || questionFocusValue==null) 69 | { 70 | questionFocusKey = qf; 71 | String line = ans[0]; 72 | p1 = line.indexOf('<'); 73 | p2 = line.lastIndexOf('>'); 74 | String value = null; 75 | if (p1 != -1 && p2 != -1) { 76 | value = line.substring(p1+1, p2); 77 | } 78 | else { 79 | p1 = line.indexOf('\"'); 80 | p2 = line.lastIndexOf('\"'); 81 | if(p1 != -1 && p2 != -1) 82 | value = line.substring(p1+1, p2); 83 | else 84 | { 85 | p1 = line.indexOf(':'); 86 | value = line.substring(p1+1); 87 | } 88 | } 89 | questionFocusValue = value; 90 | otherInformationKey.clear(); 91 | otherInformationValue.clear(); 92 | } 93 | 94 | /*System.out.println("otherInformationKey.size=" + otherInformationKey.size()); 95 | for (String k : otherInformationKey) { 96 | System.out.println("otherInfoKey = " + k); 97 | }*/ 98 | } 99 | 100 | public int compareTo (Answer p) 101 | { 102 | return questionFocusValue.compareTo(p.questionFocusValue); 103 | } 104 | 105 | } 106 | -------------------------------------------------------------------------------- /src/rdf/SimpleRelation.java: -------------------------------------------------------------------------------- 1 | package rdf; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | 6 | import paradict.PredicateIDAndSupport; 7 | import qa.Globals; 8 | 9 | import nlp.ds.DependencyTree; 10 | import nlp.ds.DependencyTreeNode; 11 | import nlp.ds.Word; 12 | 13 | // allow repetition 14 | public class SimpleRelation { 15 | public Word arg1Word = null; 16 | public Word arg2Word = null; 17 | public String relationParaphrase = null; 18 | public double matchingScore = 0; 19 | 20 | public Word arg1Word_beforeCRR = null; 21 | public Word arg2Word_beforeCRR = null; 22 | 23 | public HashMap pasList = new HashMap(); 24 | 25 | public Word preferredSubj = null; 26 | 27 | public char extractingMethod = ' '; // S: StanfordParser; M: MaltParser; N: N-gram; R: rules 28 | 29 | public SimpleRelation() 30 | { 31 | 32 | } 33 | 34 | public SimpleRelation(SimpleRelation sr) 35 | { 36 | arg1Word = sr.arg1Word; 37 | arg2Word = sr.arg2Word; 38 | relationParaphrase = sr.relationParaphrase; 39 | matchingScore = sr.matchingScore; 40 | arg1Word_beforeCRR = sr.arg1Word_beforeCRR; 41 | arg2Word_beforeCRR = sr.arg2Word_beforeCRR; 42 | pasList = sr.pasList; 43 | preferredSubj = sr.preferredSubj; 44 | extractingMethod = 'R'; 45 | } 46 | 47 | @Override 48 | public String toString() { 49 | return arg1Word.originalForm + "," + arg2Word.originalForm + "," + relationParaphrase + "," + matchingScore + "["+extractingMethod+"]"; 50 | //return arg1Word.getFullEntityName() + "," + arg2Word.getFullEntityName() + "," + relationParaphrase + "," + matchingScore + "["+extractingMethod+"]"; 51 | } 52 | 53 | public int getHashCode() { 54 | return arg1Word.hashCode() ^ arg2Word.hashCode(); 55 | } 56 | 57 | public void setPasList (String pattern, double matchingScore, boolean[] matchedFlag) { 58 | ArrayList list = Globals.pd.nlPattern_2_predicateList.get(pattern); 59 | for (PredicateIDAndSupport pidsup : list) { 60 | double sumSelectivity = 0; 61 | for (int i = 0; i < matchedFlag.length; i ++) { 62 | if (matchedFlag[i]) { 63 | sumSelectivity += pidsup.wordSelectivity[i]; 64 | } 65 | } 66 | sumSelectivity = matchingScore*sumSelectivity*pidsup.support; 67 | int pid = pidsup.predicateID; 68 | if (Globals.pd.dbo_predicate_id.contains(pid)) sumSelectivity *= 1.5; 69 | 70 | if (!pasList.containsKey(pid)) 71 | pasList.put(pid, sumSelectivity); 72 | else if (sumSelectivity > pasList.get(pid)) 73 | pasList.put(pid, sumSelectivity); 74 | } 75 | } 76 | 77 | public void setPreferedSubjObjOrder(DependencyTree tree) { 78 | DependencyTreeNode n1 = tree.getNodeByIndex(this.arg1Word.position).getNNTopTreeNode(tree); 79 | DependencyTreeNode n2 = tree.getNodeByIndex(this.arg2Word.position).getNNTopTreeNode(tree); 80 | if (n1.father != null && n1.father.word.baseForm.equals("of") && n1.dep_father2child.equals("pobj")) { 81 | this.preferredSubj = this.arg1Word; 82 | } 83 | else if (n2.father != null && n2.father.word.baseForm.equals("of") && n2.dep_father2child.equals("pobj")) { 84 | this.preferredSubj = this.arg2Word; 85 | } 86 | } 87 | 88 | } 89 | -------------------------------------------------------------------------------- /genrate_fragments/step5_get_entity_fragment.py: -------------------------------------------------------------------------------- 1 | #encoding=utf-8 2 | inEnEdge = {} 3 | outEnEdge = {} 4 | inEdge={} 5 | outEdge = {} 6 | types = {} 7 | with open('triple file represented by ids here','r') as f: 8 | i = 1 9 | for line in f: 10 | tri = line[:-1].split('\t') 11 | 12 | if tri[1] == 'id of ' and tri[2]!='-1': 13 | if types.has_key(tri[0]): 14 | types[tri[0]].add(tri[2]) 15 | else: 16 | types[tri[0]] = set() 17 | types[tri[0]].add(tri[2]) 18 | else: 19 | if outEdge.has_key(tri[0]): 20 | outEdge[tri[0]].add(tri[1]) 21 | else: 22 | outEdge[tri[0]] = set() 23 | outEdge[tri[0]].add(tri[1]) 24 | 25 | if tri[2]!='-1': 26 | if outEnEdge.has_key(tri[0]): 27 | if outEnEdge[tri[0]].has_key(tri[2]): 28 | outEnEdge[tri[0]][tri[2]].add(tri[1]) 29 | else: 30 | outEnEdge[tri[0]][tri[2]] = set() 31 | outEnEdge[tri[0]][tri[2]].add(tri[1]) 32 | else: 33 | outEnEdge[tri[0]]={} 34 | outEnEdge[tri[0]][tri[2]] = set() 35 | outEnEdge[tri[0]][tri[2]].add(tri[1]) 36 | 37 | if inEdge.has_key(tri[2]): 38 | inEdge[tri[2]].add(tri[1]) 39 | else: 40 | inEdge[tri[2]] = set() 41 | inEdge[tri[2]].add(tri[1]) 42 | if inEnEdge.has_key(tri[2]): 43 | if inEnEdge[tri[2]].has_key(tri[0]): 44 | inEnEdge[tri[2]][tri[0]].add(tri[1]) 45 | else: 46 | inEnEdge[tri[2]][tri[0]] = set() 47 | inEnEdge[tri[2]][tri[0]].add(tri[1]) 48 | else: 49 | inEnEdge[tri[2]] = {} 50 | inEnEdge[tri[2]][tri[0]] = set() 51 | inEnEdge[tri[2]][tri[0]].add(tri[1]) 52 | if i%10000 == 0: 53 | print(i) 54 | i += 1 55 | print(len(inEnEdge)) 56 | print(len(outEnEdge)) 57 | print(len(inEdge)) 58 | print(len(outEdge)) 59 | print(len(types)) 60 | wr = open('output fragment file','w') 61 | for i in range(12301050):#here we should iterate every entitiy 62 | if i%10000 == 0: 63 | print(i) 64 | eid = "%d"%i 65 | ret = "" 66 | tmp = "" 67 | if inEnEdge.has_key(eid): 68 | tmp = "" 69 | for k in inEnEdge[eid].keys(): 70 | tmp += k 71 | tmp += ':' 72 | for item in inEnEdge[eid][k]: 73 | if item == '-1': 74 | continue 75 | tmp += item + ';' 76 | tmp += ',' 77 | ret += tmp 78 | tmp = "" 79 | ret += '|' 80 | 81 | if outEnEdge.has_key(eid): 82 | tmp = "" 83 | for k in outEnEdge[eid].keys(): 84 | tmp += k 85 | tmp += ':' 86 | for item in outEnEdge[eid][k]: 87 | if item == '-1': 88 | continue 89 | tmp += item + ';' 90 | tmp += ',' 91 | ret += tmp 92 | tmp = "" 93 | ret += '|' 94 | 95 | if inEdge.has_key(eid): 96 | tmp = "" 97 | for item in inEdge[eid]: 98 | if item == '-1': 99 | continue 100 | tmp += item + ',' 101 | ret += tmp 102 | tmp="" 103 | ret += '|' 104 | 105 | if outEdge.has_key(eid): 106 | tmp = "" 107 | for item in outEdge[eid]: 108 | if item == '-1': 109 | continue 110 | tmp += item + ',' 111 | ret += tmp 112 | tmp="" 113 | ret += '|' 114 | 115 | if types.has_key(eid): 116 | tmp = "" 117 | for item in types[eid]: 118 | if item == '-1': 119 | continue 120 | tmp += item + ',' 121 | ret += tmp 122 | tmp="" 123 | wr.write("%s\t%s\n"%(eid,ret)) 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /src/fgmt/RelationFragment.java: -------------------------------------------------------------------------------- 1 | package fgmt; 2 | 3 | import java.io.IOException; 4 | import java.util.ArrayList; 5 | import java.util.HashMap; 6 | import java.util.HashSet; 7 | import java.util.List; 8 | 9 | import qa.Globals; 10 | import utils.FileUtil; 11 | 12 | public class RelationFragment extends Fragment 13 | { 14 | public static HashMap> relFragments = null; 15 | public static HashMap> relationShortName2IdList = null; 16 | public static HashSet literalRelationSet = null; 17 | 18 | public HashSet inTypes = new HashSet(); 19 | public HashSet outTypes = new HashSet(); 20 | 21 | public static final int literalTypeId = -176; 22 | 23 | public RelationFragment(String inFgmt, String outFgmt, int fid) 24 | { 25 | fragmentId = fid; 26 | fragmentType = typeEnum.RELATION_FRAGMENT; 27 | String[] nums; 28 | 29 | // in 30 | nums = inFgmt.split(","); 31 | for(String s: nums) 32 | if(s.length() > 0) 33 | inTypes.add(Integer.parseInt(s)); 34 | 35 | // out 36 | if(outFgmt.equals("itera")) 37 | outTypes.add(literalTypeId); 38 | else 39 | { 40 | nums = outFgmt.split(","); 41 | for(String s: nums) 42 | if(s.length() > 0) 43 | outTypes.add(Integer.parseInt(s)); 44 | } 45 | } 46 | 47 | public static void load() throws Exception 48 | { 49 | String filename = Globals.localPath + "data/DBpedia2016/fragments/predicate_RDF_fragment/predicate_fragment.txt"; 50 | List inputs = FileUtil.readFile(filename); 51 | relFragments = new HashMap>(); 52 | literalRelationSet = new HashSet(); 53 | 54 | for(String line: inputs) 55 | { 56 | String[] lines = line.split("\t"); 57 | String inString = lines[0].substring(1, lines[0].length()-1); 58 | int pid = Integer.parseInt(lines[1]); 59 | String outString = lines[2].substring(1, lines[2].length()-1); 60 | 61 | // Record which relations can connect LITERAL objects. 62 | if(outString.equals("itera")) // "literal".substring(1, length()-1) 63 | literalRelationSet.add(pid); 64 | 65 | if(!relFragments.containsKey(pid)) 66 | relFragments.put(pid, new ArrayList()); 67 | relFragments.get(pid).add(new RelationFragment(inString, outString, pid)); 68 | } 69 | 70 | loadId(); 71 | } 72 | 73 | public static void loadId() throws IOException 74 | { 75 | String filename = Globals.localPath + "data/DBpedia2016/fragments/id_mappings/16predicate_id.txt"; 76 | List inputs = FileUtil.readFile(filename); 77 | relationShortName2IdList = new HashMap>(); 78 | 79 | for(String line: inputs) 80 | { 81 | String[] lines = line.split("\t"); 82 | String rlnShortName = lines[0]; 83 | 84 | if (!relationShortName2IdList.containsKey(rlnShortName)) 85 | relationShortName2IdList.put(rlnShortName, new ArrayList()); 86 | relationShortName2IdList.get(rlnShortName).add(Integer.parseInt(lines[1])); 87 | } 88 | } 89 | 90 | public static boolean isLiteral (String p) 91 | { 92 | for (Integer i : relationShortName2IdList.get(p)) 93 | if (literalRelationSet.contains(i)) 94 | return true; 95 | return false; 96 | } 97 | 98 | public static boolean isLiteral (int pid) 99 | { 100 | if (literalRelationSet.contains(pid)) 101 | return true; 102 | else 103 | return false; 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /src/lcn/BuildIndexForTypeShortName.java: -------------------------------------------------------------------------------- 1 | package lcn; 2 | 3 | import java.io.File; 4 | import java.util.ArrayList; 5 | import java.util.Date; 6 | import java.util.HashMap; 7 | import java.util.Iterator; 8 | 9 | import org.apache.lucene.analysis.Analyzer; 10 | import org.apache.lucene.analysis.standard.StandardAnalyzer; 11 | import org.apache.lucene.document.Document; 12 | import org.apache.lucene.document.Field; 13 | import org.apache.lucene.index.IndexWriter; 14 | 15 | import qa.Globals; 16 | import fgmt.TypeFragment; 17 | 18 | public class BuildIndexForTypeShortName { 19 | public static void buildIndex(HashMap> typeShortName2IdList) throws Exception 20 | { 21 | long startTime = new Date().getTime(); 22 | File indexDir_li = new File("D:/husen/gAnswer/data/DBpedia2016/lucene/type_fragment_index"); 23 | 24 | Analyzer luceneAnalyzer_li = new StandardAnalyzer(); 25 | IndexWriter indexWriter_li = new IndexWriter(indexDir_li, luceneAnalyzer_li,true); 26 | 27 | int mergeFactor = 100000; 28 | int maxBufferedDoc = 1000; 29 | int maxMergeDoc = Integer.MAX_VALUE; 30 | 31 | //indexWriter.DEFAULT_MERGE_FACTOR = mergeFactor; 32 | indexWriter_li.setMergeFactor(mergeFactor); 33 | indexWriter_li.setMaxBufferedDocs(maxBufferedDoc); 34 | indexWriter_li.setMaxMergeDocs(maxMergeDoc); 35 | 36 | int count = 0; 37 | Iterator it = typeShortName2IdList.keySet().iterator(); 38 | while (it.hasNext()) 39 | { 40 | String sn = it.next(); 41 | if (sn.length() == 0) { 42 | continue; 43 | } 44 | 45 | count ++; 46 | 47 | StringBuilder splittedSn = new StringBuilder(""); 48 | 49 | if(sn.contains("_")) 50 | { 51 | String nsn = sn.replace("_", " "); 52 | splittedSn.append(nsn.toLowerCase()); 53 | } 54 | else 55 | { 56 | int last = 0, i = 0; 57 | for(i = 0; i < sn.length(); i ++) 58 | { 59 | // if it were not a small letter, then break it. 60 | if(!(sn.charAt(i)>='a' && sn.charAt(i)<='z')) 61 | { 62 | splittedSn.append(sn.substring(last, i).toLowerCase()); 63 | splittedSn.append(' '); 64 | last = i; 65 | } 66 | } 67 | splittedSn.append(sn.substring(last, i).toLowerCase()); 68 | while(splittedSn.charAt(0) == ' ') { 69 | splittedSn.deleteCharAt(0); 70 | } 71 | } 72 | 73 | System.out.println("SplitttedType: "+splittedSn); 74 | 75 | Document document = new Document(); 76 | 77 | Field SplittedTypeShortName = new Field("SplittedTypeShortName", splittedSn.toString(), 78 | Field.Store.YES, 79 | Field.Index.TOKENIZED, 80 | Field.TermVector.WITH_POSITIONS_OFFSETS); 81 | Field TypeShortName = new Field("TypeShortName", sn, 82 | Field.Store.YES, Field.Index.NO); 83 | 84 | document.add(SplittedTypeShortName); 85 | document.add(TypeShortName); 86 | indexWriter_li.addDocument(document); 87 | } 88 | 89 | indexWriter_li.optimize(); 90 | indexWriter_li.close(); 91 | 92 | // input the time of Build index 93 | long endTime = new Date().getTime(); 94 | System.out.println("TypeShortName index has build ->" + count + " " + "Time:" + (endTime - startTime)); 95 | } 96 | 97 | public static void main (String[] args) { 98 | try { 99 | Globals.localPath="D:/husen/gAnswer/"; 100 | TypeFragment.load(); 101 | BuildIndexForTypeShortName.buildIndex(TypeFragment.typeShortName2IdList); 102 | } catch (Exception e) { 103 | e.printStackTrace(); 104 | } 105 | } 106 | 107 | } 108 | -------------------------------------------------------------------------------- /src/nlp/ds/Word.java: -------------------------------------------------------------------------------- 1 | package nlp.ds; 2 | 3 | import java.util.ArrayList; 4 | 5 | import rdf.EntityMapping; 6 | import rdf.Triple; 7 | import rdf.TypeMapping; 8 | 9 | public class Word implements Comparable 10 | { 11 | public boolean mayCategory = false; 12 | public boolean mayLiteral = false; 13 | public boolean mayEnt = false; 14 | public boolean mayType = false; 15 | public boolean mayExtendVariable = false; 16 | public String category = null; 17 | public ArrayList emList = null; 18 | public ArrayList tmList = null; 19 | public Triple embbededTriple = null; 20 | 21 | public String baseForm = null; 22 | public String originalForm = null; 23 | public String posTag = null; 24 | public int position = -1; // Notice the first word's position = 1 25 | public String key = null; 26 | 27 | public boolean isCovered = false; 28 | public boolean isIgnored = false; 29 | 30 | //Notice: These variables are not used because we merge a phrase to a word if it is a node now. 31 | public String ner = null; // record NER result 32 | public Word nnNext = null; 33 | public Word nnPrev = null; 34 | public Word crr = null; // coreference resolution result 35 | 36 | public Word represent = null; // This word is represented by others, eg, "which book is ..." "which" 37 | public boolean omitNode = false; // This word can not be node 38 | public Word modifiedWord = null; // This word modify which word (it modify itself if it is not a modified word) 39 | 40 | public Word (String base, String original, String pos, int posi) { 41 | baseForm = base; 42 | originalForm = original; 43 | posTag = pos; 44 | position = posi; 45 | key = new String(originalForm+"["+position+"]"); 46 | } 47 | 48 | @Override 49 | public String toString() { 50 | return key; 51 | } 52 | 53 | public int compareTo(Word another) { 54 | return this.position-another.position; 55 | } 56 | 57 | @Override 58 | public int hashCode() { 59 | return key.hashCode(); 60 | } 61 | 62 | @Override 63 | public boolean equals(Object o) { 64 | return (o instanceof Word) 65 | && originalForm.equals(((Word)o).originalForm) 66 | && position == ((Word)o).position; 67 | } 68 | 69 | // We now discard all NN information and return the word itself. | husen 2016 70 | public Word getNnHead() { 71 | Word w = this; 72 | return w; 73 | 74 | // if(w.mayEnt || w.mayType) 75 | // return w; 76 | // 77 | // while (w.nnPrev != null) { 78 | // w = w.nnPrev; 79 | // } 80 | // return w; 81 | } 82 | 83 | public String getFullEntityName() { 84 | Word w = this.getNnHead(); 85 | return w.originalForm; 86 | 87 | // if(w.mayEnt || w.mayType) 88 | // return w.originalForm; 89 | // 90 | // StringBuilder sb = new StringBuilder(""); 91 | // while (w != null) { 92 | // sb.append(w.originalForm); 93 | // sb.append(' '); 94 | // w = w.nnNext; 95 | // } 96 | // sb.deleteCharAt(sb.length()-1); 97 | // return sb.toString(); 98 | } 99 | 100 | public String getBaseFormEntityName() { 101 | Word w = this.getNnHead(); 102 | if(w.mayEnt || w.mayType) 103 | return w.baseForm; 104 | 105 | StringBuilder sb = new StringBuilder(""); 106 | while (w != null) { 107 | sb.append(w.baseForm); 108 | sb.append(' '); 109 | w = w.nnNext; 110 | } 111 | sb.deleteCharAt(sb.length()-1); 112 | return sb.toString(); 113 | } 114 | 115 | public String isNER () { 116 | return this.getNnHead().ner; 117 | } 118 | 119 | public void setIsCovered () { 120 | Word w = this.getNnHead(); 121 | while (w != null) { 122 | w.isCovered = true; 123 | w = w.nnNext; 124 | } 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /src/qa/Query.java: -------------------------------------------------------------------------------- 1 | package qa; 2 | 3 | import java.util.ArrayList; 4 | 5 | import nlp.ds.Sentence; 6 | import qa.extract.EntityRecognition; 7 | import rdf.MergedWord; 8 | 9 | /** 10 | * 1. preprocessing of question 11 | * 2. Node Recognition 12 | * @author husen 13 | */ 14 | public class Query 15 | { 16 | public String NLQuestion = null; 17 | public String TransferedQuestion = null; 18 | public ArrayList MergedQuestionList = null; 19 | public ArrayList sList = null; 20 | 21 | public String queryId = null; 22 | public String preLog = ""; 23 | 24 | public ArrayList mWordList = null; 25 | 26 | public Query(){} 27 | public Query(String _question) 28 | { 29 | NLQuestion = _question; 30 | NLQuestion = removeQueryId(NLQuestion); 31 | 32 | TransferedQuestion = getTransferedQuestion(NLQuestion); 33 | 34 | // step1. NODE Recognition 35 | MergedQuestionList = getMergedQuestionList(TransferedQuestion); 36 | 37 | // build Sentence 38 | sList = new ArrayList(); 39 | for(String mergedQuestion: MergedQuestionList) 40 | { 41 | Sentence sentence = new Sentence(this, mergedQuestion); 42 | sList.add(sentence); 43 | } 44 | } 45 | 46 | public boolean isDigit(char ch) 47 | { 48 | if(ch>='0' && ch<='9') 49 | return true; 50 | return false; 51 | } 52 | 53 | public boolean isUpperWord(char ch) 54 | { 55 | if(ch>='A' && ch<='Z') 56 | return true; 57 | return false; 58 | } 59 | 60 | /** 61 | * some words -> equivalent words 62 | * 1、stanfordParser often parse incorrect. 63 | * 2、Synonyms unify. eg, movie->film 64 | * @param question 65 | * @return transfered question 66 | */ 67 | public String getTransferedQuestion(String question) 68 | { 69 | //rule1: discard ".", because "." and "_" will be disconnected by parser. Discard word tail's "'", which may pollutes NER 70 | question = question.replace("' ", " "); 71 | String [] words = question.split(" "); 72 | String ret = ""; 73 | for(String word: words) 74 | { 75 | String retWord = word; 76 | //TODO: now just check NUM in head/tail 77 | if(word.length()>=2 && !isDigit(word.charAt(0)) && !isDigit(word.charAt(word.length()-1))) 78 | { 79 | retWord = retWord.replace(".", ""); 80 | } 81 | ret += retWord + " "; 82 | } 83 | if(ret.length()>1) 84 | ret = ret.substring(0,ret.length()-1); 85 | 86 | ret = ret.replace("-", " "); 87 | ret = ret.replace("in america", "in United States"); 88 | 89 | //rule2: as well as -> and 90 | ret = ret.replace("as well as", "and"); 91 | 92 | //rule3: movie -> film 93 | ret = ret.replace(" movie", " film"); 94 | ret = ret.replace(" movies", " films"); 95 | 96 | return ret; 97 | } 98 | 99 | /** 100 | * Recognize entity & type & literal in KB and replace " " in Phrases with "_" 101 | * @param question 102 | * @return merged question list 103 | */ 104 | public ArrayList getMergedQuestionList(String question) 105 | { 106 | ArrayList mergedQuestionList = null; 107 | //entity & type recognize 108 | EntityRecognition er = new EntityRecognition(); 109 | mergedQuestionList = er.process(question); 110 | preLog = er.preLog; 111 | mWordList = er.mWordList; 112 | 113 | return mergedQuestionList; 114 | } 115 | 116 | public String removeQueryId(String question) 117 | { 118 | String ret = question; 119 | int st = question.indexOf("\t"); 120 | if(st!=-1 && question.length()>1 && question.charAt(0)>='0' && question.charAt(0)<='9') 121 | { 122 | queryId = question.substring(0,st); 123 | ret = question.substring(st+1); 124 | System.out.println("Extract QueryId :"+queryId); 125 | } 126 | return ret; 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /src/log/QueryLogger.java: -------------------------------------------------------------------------------- 1 | package log; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collections; 5 | import java.util.HashMap; 6 | import java.util.HashSet; 7 | 8 | import javax.servlet.http.HttpServletRequest; 9 | 10 | import qa.Matches; 11 | import qa.Query; 12 | import rdf.EntityMapping; 13 | import rdf.SemanticRelation; 14 | import rdf.Sparql; 15 | import rdf.MergedWord; 16 | import rdf.SemanticUnit; 17 | import qa.Answer; 18 | import nlp.ds.Sentence; 19 | import nlp.ds.Word; 20 | 21 | public class QueryLogger { 22 | public Sentence s = null; 23 | public String ipAdress = null; 24 | 25 | public Word target = null; 26 | public Sparql sparql = null; 27 | public Matches match = null; 28 | public ArrayList answers = null; 29 | 30 | public boolean MODE_debug = false; 31 | public boolean MODE_log = true; 32 | public boolean MODE_fragment = true; 33 | public boolean isMaltParserUsed = true; // Notice, we utilize Malt Parser as default parser, which is different from the older version. TODO: some coref rules need changed to fit Malt Parser. 34 | 35 | public HashMap timeTable = null; 36 | public ArrayList mWordList = null; 37 | public ArrayList semanticUnitList = null; 38 | public HashMap semanticRelations = null; 39 | public HashMap potentialSemanticRelations = null; 40 | public HashMap> entityDictionary = null; 41 | public ArrayList rankedSparqls = null; 42 | 43 | public String NRlog = ""; 44 | public String SQGlog = ""; 45 | public int gStoreCallTimes = 0; 46 | 47 | public QueryLogger (Query query) 48 | { 49 | timeTable = new HashMap(); 50 | rankedSparqls = new ArrayList(); 51 | mWordList = query.mWordList; 52 | } 53 | 54 | public void reloadSentence(Sentence sentence) 55 | { 56 | this.s = sentence; 57 | if(this.semanticUnitList != null) 58 | this.semanticUnitList.clear(); 59 | if(this.semanticRelations != null) 60 | this.semanticRelations.clear(); 61 | if(this.rankedSparqls != null) 62 | this.rankedSparqls.clear(); 63 | } 64 | 65 | // Source code: http://edu.21cn.com/java/g_189_755584-1.htm 66 | public static String getIpAddr(HttpServletRequest request) { 67 | String ip = request.getHeader("x-forwarded-for"); 68 | if(ip == null || ip.length() == 0 || "unknown".equalsIgnoreCase(ip)) { 69 | ip = request.getHeader("Proxy-Client-IP"); 70 | } 71 | if(ip == null || ip.length() == 0 || "unknown".equalsIgnoreCase(ip)) { 72 | ip = request.getHeader("WL-Proxy-Client-IP"); 73 | } 74 | if(ip == null || ip.length() == 0 || "unknown".equalsIgnoreCase(ip)) { 75 | ip = request.getRemoteAddr(); 76 | } 77 | 78 | int idx; 79 | if((idx = ip.indexOf(',')) != -1) { 80 | ip = ip.substring(0, idx); 81 | } 82 | return ip; 83 | } 84 | 85 | public void reviseAnswers() 86 | { 87 | System.out.println("Revise Answers:"); 88 | answers = new ArrayList(); 89 | if (match == null || sparql == null || match.answers == null || sparql.questionFocus == null) 90 | return; 91 | 92 | HashSet answerSet = new HashSet(); 93 | String questionFocus = sparql.questionFocus; 94 | String sparqlString = sparql.toStringForGStore(); 95 | //System.out.println("mal="+match.answers.length); 96 | for (int i=0;iMain_project_directory 30 | >>Ganswer.jar
31 | >>unzipped files from Ganswer.jar
32 | >>data 33 | >>>unzipped files from dbpedia16.rar
34 | - 在控制台下运行jar包。 35 | ```java 36 | java -jar Ganswer.jar 37 | ``` 38 | - 等待系统初始化结束,出现Server Ready!字样后,则说明初始化成功,您可以开始通过Http请求访问gAnswer的服务了。 39 | 40 | ### 通过http请求使用GAnswer 41 | 我们为您提供了一个简单的样例,以说明如何通过http请求,获取GAnswer服务。 42 | 您可以通过类似下面的url来访问GAnswer: 43 | http://[ip]:[port]/gSolve/?data={maxAnswerNum:1, maxSparqlNum:2, question:Who is the wife of Donald Trump?} 44 |
其中,[ip]和[port]分别为您启动GAnswer服务的ip地址和端口(端口系统默认为9999),您需要通过在http请求中添加“data”参数,传递一个json字符串给GAnswer。 45 | 在这个样例中,您实际传递的json数据为: 46 | ```json 47 | { 48 | "maxAnswerNum":"1", 49 | "maxSparqlNum":"2", 50 | "question":"Whos is the wife of Donald Trump?" 51 | } 52 | ``` 53 | 其中,maxAnswerNum和maxSparqlNum分别规定了返回的答案和sparql的数量上限,这两个数据项都是可选的。 54 | 一般情况下,这时GAnswer会返回一个json字符串,其中包含了系统生成的sparql和问题答案。 55 | ```json 56 | { 57 | "question":"Who is the wife of Donald Trump?", 58 | "vars":["?wife"], 59 | "sparql":["select DISTINCT ?wife where { \t\t?wife. } LIMIT 1","select DISTINCT ?wife where { ?wife\t\t. } LIMIT 1"], 60 | "results":{"bindings":[{"?wife":{"type":"uri","value":""}}]}, 61 | "status":"200" 62 | } 63 | ``` 64 | 详细信息可以在帮助文档的‘“2.1.1 开始使用”’一章找到。 65 | 66 | 67 | 68 | ### 使用eclipse运行 69 | 当您使用eclipse运行gAnswer系统时,只需要通过clone或者download获取工程源码,然后按正常步骤导入Eclipse工程,同时将lib中的jar包加入Build Path中即可。由于外部jar包过大,无法上传github,您可以从[此处](https://pan.baidu.com/s/18IegmEgj02fF9KQFwaQr0g)下载所有需要的外部jar包,提取码为64jd。或者通过[Google Drive](https://drive.google.com/file/d/1tEsi4pBOBHd2gmwVgIOgt-ypJZQH9G3S)下载。 70 | 这时,您同样需要下载解压dbpedia16.rar,并解压到工程文件根目录下的data文件夹中。与数据路径相关的参数,您可以在qa.Globals.localPath中找到 71 | 72 | ### 注意事项 73 | 要运行gAnswer系统,需要较多的包依赖、文件依赖和外部接口依赖,关于这部分要求,请您参阅帮助文档的“2.4 安装指南”。 74 | 在生成SPARQL查询后,系统默认调用部署在远程服务器上的gStore查询引擎来查找答案。这意味着额外的网络传输开销和可能存在的排队等待开销。 75 | 因此我们强烈建议您在自己的服务器上部署gStore查询引擎并建立对应的知识库。您需要: 76 | 77 | - 下载[DBpedia2016 triples文件](https://pan.baidu.com/s/1l5Oui65sDn8QPYmA0rUvuA),提取码89yy。 78 | - 部署[gStore](http://gstore-pku.com)查询引擎,并使用下载的triples文件来构建数据库。值得提醒的是,DBpedia 2016 triples文件大小为9.9GB,构建数据库需要较大的内存(>10GB)和较长的时间(10小时左右)。 79 | 80 | ## 其他事项 81 | 82 | 我们非常欢迎您使用gAnswer,并向我们提出您的宝贵意见或者bug报告。 83 | 84 | 如果您的意见或者报告被采纳,我们会将您的贡献记录在我们的帮助文档中。 85 | 86 | 我们针对QA任务和gAnswer系统发表了多篇论文,您可以在帮助文档的“3.2 出版物”一章找到相关信息。 87 | 88 | 89 | ## 在gAnswer上使用你自己的数据 90 | 如果您希望将您自己的三元组数据集移植到gAnswer上,那么您需要利用这些三元组为gAnswer重新生成fragments。 我们提供了一个[详细的教程](genrate_fragments/How_to_generate_fragments.md)来帮助您完成这项工作。 91 | -------------------------------------------------------------------------------- /src/qa/Globals.java: -------------------------------------------------------------------------------- 1 | package qa; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.IOException; 5 | import java.io.InputStreamReader; 6 | 7 | import lcn.EntityFragmentFields; 8 | import fgmt.RelationFragment; 9 | import fgmt.TypeFragment; 10 | import paradict.ParaphraseDictionary; 11 | import qa.mapping.DBpediaLookup; 12 | import nlp.tool.NERecognizer; 13 | import nlp.tool.CoreNLP; 14 | import nlp.tool.MaltParser; 15 | import nlp.tool.StanfordParser; 16 | import nlp.tool.StopWordsList; 17 | 18 | public class Globals { 19 | // nlp tools 20 | public static CoreNLP coreNLP; 21 | public static StanfordParser stanfordParser; 22 | public static StopWordsList stopWordsList; 23 | public static MaltParser maltParser; 24 | public static NERecognizer nerRecognizer; 25 | // relation paraphrase dictionary 26 | public static ParaphraseDictionary pd; 27 | // entity linking system 28 | public static DBpediaLookup dblk; 29 | public static int MaxAnswerNum = 100; 30 | public static String Dataset = "dbpedia 2016"; 31 | public static String Version = "0.1.2"; 32 | public static String GDBsystem = "gStore v0.7.2"; 33 | 34 | /* 35 | * evaluationMethod: 36 | * 1. baseline(SQG), does not allow CIRCLE and WRONG edge. The structure may be different by changing the TARGET. 37 | * 2. super SQG, allow CIRCLE and WRONG edge. The structure is decided by DS tree, and can be changed in query evaluation(TOP-K match) stage. 38 | * */ 39 | public static int evaluationMethod = 2; 40 | 41 | public static String localPath = "./././"; 42 | public static String QueryEngineIP = "dbpedia16.gstore-pku.com"; // Notice, PORT number is in the evaluation function. 43 | public static int QueryEnginePort = 80; 44 | 45 | public static void init () 46 | { 47 | System.out.println("====== gAnswer2.0 over DBpedia ======"); 48 | 49 | long t1, t2, t3, t4, t5, t6, t7, t8, t9; 50 | 51 | t1 = System.currentTimeMillis(); 52 | coreNLP = new CoreNLP(); 53 | 54 | t2 = System.currentTimeMillis(); 55 | stanfordParser = new StanfordParser(); 56 | 57 | t3 = System.currentTimeMillis(); 58 | maltParser = new MaltParser(); 59 | 60 | t4 = System.currentTimeMillis(); 61 | nerRecognizer = new NERecognizer(); 62 | 63 | t5 = System.currentTimeMillis(); 64 | stopWordsList = new StopWordsList(); 65 | 66 | t6 = System.currentTimeMillis(); 67 | pd = new ParaphraseDictionary(); 68 | 69 | t7 = System.currentTimeMillis(); 70 | try 71 | { 72 | EntityFragmentFields.load(); 73 | RelationFragment.load(); 74 | TypeFragment.load(); 75 | } 76 | catch (Exception e1) { 77 | System.out.println("EntityIDs and RelationFragment and TypeFragment loading error!"); 78 | e1.printStackTrace(); 79 | } 80 | 81 | t8 = System.currentTimeMillis(); 82 | dblk = new DBpediaLookup(); 83 | 84 | t9 = System.currentTimeMillis(); 85 | System.out.println("======Initialization======"); 86 | System.out.println("CoreNLP(Lemma): " + (t2-t1) + "ms."); 87 | System.out.println("StanfordParser: " + (t3-t2) + "ms."); 88 | System.out.println("MaltParser: " + (t4-t3) + "ms."); 89 | System.out.println("NERecognizer: " + (t5-t4) + "ms."); 90 | System.out.println("StopWordsList: " + (t6-t5) + "ms."); 91 | System.out.println("ParaphraseDict & posTagPattern: " + (t7-t6) + "ms."); 92 | System.out.println("GraphFragments: " + (t8-t7) + "ms."); 93 | System.out.println("DBpediaLookup: " + (t9-t8) + "ms."); 94 | System.out.println("* Total *: " + (t9-t1) + "ms."); 95 | System.out.println("=========================="); 96 | } 97 | 98 | 99 | /** 100 | * Use as system("pause") in C 101 | */ 102 | public static void systemPause () { 103 | System.out.println("System pause ..."); 104 | BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); 105 | try { 106 | br.readLine(); 107 | } catch (IOException e) { 108 | e.printStackTrace(); 109 | } 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /src/lcn/BuildIndexForEntityFragments.java: -------------------------------------------------------------------------------- 1 | package lcn; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.File; 5 | import java.io.FileInputStream; 6 | import java.io.InputStreamReader; 7 | import java.util.Date; 8 | 9 | import org.apache.lucene.analysis.Analyzer; 10 | import org.apache.lucene.analysis.standard.StandardAnalyzer; 11 | import org.apache.lucene.document.Document; 12 | import org.apache.lucene.document.Field; 13 | import org.apache.lucene.index.IndexWriter; 14 | 15 | import qa.Globals; 16 | 17 | 18 | public class BuildIndexForEntityFragments{ 19 | public void indexforentity() throws Exception 20 | { 21 | if(EntityFragmentFields.entityId2Name == null) 22 | EntityFragmentFields.load(); 23 | 24 | long startTime = new Date().getTime(); 25 | 26 | //Try update KB index to DBpedia2015. by husen 2016-04-08 27 | //Try update KB index to DBpedia2016. by husen 2018-8-22 28 | File indexDir_en = new File("D:/husen/gAnswer/data/DBpedia2016/lucene/entity_fragment_index"); 29 | File sourceDir_en = new File("D:/husen/gAnswer/data/DBpedia2016/fragments/entity_RDF_fragment/16entity_fragment.txt"); 30 | 31 | Analyzer luceneAnalyzer_en = new StandardAnalyzer(); 32 | IndexWriter indexWriter_en = new IndexWriter(indexDir_en, luceneAnalyzer_en,true); 33 | 34 | int mergeFactor = 100000; //default 10 35 | int maxBufferedDoc = 1000; //default 10 36 | int maxMergeDoc = Integer.MAX_VALUE; //INF 37 | 38 | //indexWriter.DEFAULT_MERGE_FACTOR = mergeFactor; 39 | indexWriter_en.setMergeFactor(mergeFactor); 40 | indexWriter_en.setMaxBufferedDocs(maxBufferedDoc); 41 | indexWriter_en.setMaxMergeDocs(maxMergeDoc); 42 | 43 | 44 | FileInputStream file = new FileInputStream(sourceDir_en); 45 | InputStreamReader in = new InputStreamReader(file,"UTF-8"); 46 | BufferedReader br = new BufferedReader(in); 47 | 48 | int count = 0; 49 | while(true) 50 | { 51 | String _line = br.readLine(); 52 | { 53 | if(_line == null) break; 54 | } 55 | count++; 56 | if(count % 100000 == 0) 57 | System.out.println(count); 58 | 59 | String line = _line; 60 | String temp[] = line.split("\t"); 61 | 62 | if(temp.length != 2) 63 | continue; 64 | else 65 | { 66 | int entity_id = Integer.parseInt(temp[0]); 67 | if(!EntityFragmentFields.entityId2Name.containsKey(entity_id)) 68 | continue; 69 | 70 | String entity_name = EntityFragmentFields.entityId2Name.get(entity_id); 71 | String entity_fragment = temp[1]; 72 | entity_name = entity_name.replace("____", " "); 73 | entity_name = entity_name.replace("__", " "); 74 | entity_name = entity_name.replace("_", " "); 75 | 76 | 77 | Document document = new Document(); 78 | 79 | Field EntityName = new Field("EntityName", entity_name, Field.Store.YES, 80 | Field.Index.TOKENIZED, 81 | Field.TermVector.WITH_POSITIONS_OFFSETS); 82 | Field EntityId = new Field("EntityId", String.valueOf(entity_id), 83 | Field.Store.YES, Field.Index.NO); 84 | Field EntityFragment = new Field("EntityFragment", entity_fragment, 85 | Field.Store.YES, Field.Index.NO); 86 | 87 | document.add(EntityName); 88 | document.add(EntityId); 89 | document.add(EntityFragment); 90 | indexWriter_en.addDocument(document); 91 | } 92 | } 93 | 94 | indexWriter_en.optimize(); 95 | indexWriter_en.close(); 96 | br.close(); 97 | 98 | // input the time of Build index 99 | long endTime = new Date().getTime(); 100 | System.out.println("entity_name index has build ->" + count + " " + "Time:" + (endTime - startTime)); 101 | } 102 | 103 | public static void main(String[] args) 104 | { 105 | BuildIndexForEntityFragments bef = new BuildIndexForEntityFragments(); 106 | 107 | try 108 | { 109 | Globals.localPath="D:/husen/gAnswer/"; 110 | bef.indexforentity(); 111 | } 112 | catch (Exception e) 113 | { 114 | e.printStackTrace(); 115 | } 116 | } 117 | } 118 | 119 | 120 | -------------------------------------------------------------------------------- /src/nlp/ds/DependencyTreeNode.java: -------------------------------------------------------------------------------- 1 | package nlp.ds; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collections; 5 | import java.util.Comparator; 6 | import java.util.Stack; 7 | 8 | public class DependencyTreeNode { 9 | public Word word = null; 10 | public String dep_father2child = null; 11 | 12 | public DependencyTreeNode father = null; 13 | public ArrayList childrenList = null; 14 | 15 | public int levelInTree = -1; 16 | 17 | /** 18 | * The constructor for knowing its father 19 | * 20 | * @param w 21 | * @param dep_father2child 22 | * @param father 23 | */ 24 | public DependencyTreeNode(Word w, String dep_father2child, DependencyTreeNode father) 25 | { 26 | word = w; 27 | this.dep_father2child = dep_father2child; 28 | this.father = father; 29 | this.childrenList = new ArrayList(); 30 | 31 | if(father==null) levelInTree = 0; 32 | else levelInTree = father.levelInTree+1; 33 | } 34 | 35 | /** 36 | * The constructor for not knowing the father 37 | * 38 | * @param word 39 | */ 40 | public DependencyTreeNode(Word w) 41 | { 42 | this.word = w; 43 | this.childrenList = new ArrayList(); 44 | } 45 | 46 | public void sortChildrenList () { 47 | childrenList.trimToSize(); 48 | Collections.sort(childrenList, new DependencyTreeNodeComparator()); 49 | } 50 | 51 | @Override 52 | public String toString(){ 53 | return word.originalForm + "-" + word.posTag + "(" + dep_father2child + ")[" + word.position + "]"; 54 | } 55 | 56 | public static void sortArrayList(ArrayList list) { 57 | Collections.sort(list, new DependencyTreeNodeComparator()); 58 | } 59 | 60 | public DependencyTreeNode containDependencyWithChildren (String dep) { 61 | for (DependencyTreeNode son : childrenList) { 62 | if (son.dep_father2child.equals(dep)) return son; 63 | } 64 | return null; 65 | } 66 | 67 | /** 68 | * equal_or_startWith = true: equal 69 | * equal_or_startWith = false: startWith 70 | * 71 | * @param posChild 72 | * @param equal_or_startWith 73 | * @return 74 | */ 75 | public DependencyTreeNode containPosInChildren (String posChild, boolean equal_or_startWith) { 76 | for (DependencyTreeNode son : childrenList) { 77 | if (equal_or_startWith) { 78 | if (son.word.posTag.equals(posChild)) return son; 79 | } 80 | else { 81 | if (son.word.posTag.startsWith(posChild)) return son; 82 | } 83 | } 84 | return null; 85 | } 86 | 87 | public DependencyTreeNode containWordBaseFormInChildren (String wordBaseFormChild) { 88 | for (DependencyTreeNode son : childrenList) { 89 | if (son.word.baseForm.equals(wordBaseFormChild)) return son; 90 | } 91 | return null; 92 | } 93 | 94 | public DependencyTreeNode getNNTopTreeNode (DependencyTree T) { 95 | if(this.father != null && (this.dep_father2child.equals("nn") || (this.word.posTag.startsWith("NN") && this.dep_father2child.equals("dep")))) { 96 | return this.father.getNNTopTreeNode(T); 97 | } 98 | else return this; 99 | } 100 | 101 | public Word linkNN(DependencyTree T) { 102 | // (Now useless) backtracking the NN connections. 103 | ArrayList nn = new ArrayList(); 104 | 105 | nn.add(this); 106 | 107 | if(this.father != null && (this.dep_father2child.equals("nn") 108 | || (this.word.posTag.startsWith("NN") && this.dep_father2child.equals("dep") && this.father.word.posTag.startsWith("NN")))) { 109 | nn.add(this.father); 110 | for(DependencyTreeNode son : this.father.childrenList) { 111 | if (son != this && son.dep_father2child.equals("nn")) { 112 | nn.add(son); 113 | } 114 | } 115 | } 116 | 117 | Stack stack = new Stack(); 118 | stack.push(this); 119 | while (!stack.empty()) { 120 | DependencyTreeNode curNode = stack.pop(); 121 | for(DependencyTreeNode son : curNode.childrenList) { 122 | if (son.dep_father2child.equals("nn") 123 | || (son.word.posTag.startsWith("NN") && son.dep_father2child.equals("dep") && son.father.word.posTag.startsWith("NN"))) { 124 | nn.add(son); 125 | stack.push(son); 126 | } 127 | } 128 | } 129 | 130 | DependencyTreeNode.sortArrayList(nn); 131 | 132 | int size = nn.size() - 1; 133 | for (int i = 0; i < size; i ++) { 134 | nn.get(i).word.nnNext = nn.get(i+1).word; 135 | nn.get(i+1).word.nnPrev = nn.get(i).word; 136 | } 137 | 138 | return this.word.getNnHead(); 139 | } 140 | 141 | }; 142 | 143 | 144 | class DependencyTreeNodeComparator implements Comparator { 145 | 146 | public int compare(DependencyTreeNode n1, DependencyTreeNode n2) { 147 | return n1.word.position - n2.word.position; 148 | } 149 | 150 | } 151 | -------------------------------------------------------------------------------- /src/addition/AggregationRecognition.java: -------------------------------------------------------------------------------- 1 | package addition; 2 | 3 | import nlp.ds.DependencyTree; 4 | import nlp.ds.DependencyTreeNode; 5 | import nlp.ds.Word; 6 | import qa.Globals; 7 | import rdf.Sparql; 8 | import rdf.Triple; 9 | import log.QueryLogger; 10 | 11 | public class AggregationRecognition { 12 | 13 | // Numbers 14 | static String x[]={"zero","one","two","three","four","five","six","seven","eight","nine"}; 15 | static String y[]={"ten","eleven","twelve","thirteen","fourteen","fifteen","sixteen","seventeen","eighteen","nineteen"}; 16 | static String z[]={"twenty","thirty","forty","fifty","sixty","seventy","eighty","ninety"}; 17 | static int b; 18 | 19 | public static Integer translateNumbers(String str) // 1~100 20 | { 21 | int flag; 22 | try { 23 | b=Integer.valueOf(str); 24 | flag=1; 25 | } 26 | catch (Exception e){ 27 | flag=2; 28 | } 29 | int i,j; 30 | switch(flag) 31 | { 32 | case 1: 33 | return b; 34 | case 2: // Words need to be translated into numbers 35 | for(i=0;i<8;i++) // 20~99 36 | { 37 | for(j=0;j<10;j++) 38 | { 39 | String str1=z[i],str2=x[j]; 40 | if(str.equals((str1))){ 41 | return i*10+20; // 1x 42 | } 43 | 44 | else if(str.equals((str1+" "+str2))){ 45 | return i*10+j+20; 46 | } 47 | } 48 | } 49 | 50 | for(i=0;i<10;i++){ 51 | if(str.equals(x[i])){ 52 | return i; 53 | } 54 | else if(str.equals(y[i])){ 55 | return 10+i; 56 | } 57 | } 58 | 59 | System.out.println("Warning: Can not Translate Number: " + str); 60 | } 61 | return 1; 62 | } 63 | 64 | 65 | public void recognize(QueryLogger qlog) 66 | { 67 | DependencyTree ds = qlog.s.dependencyTreeStanford; 68 | if(qlog.isMaltParserUsed) 69 | ds = qlog.s.dependencyTreeMalt; 70 | 71 | Word[] words = qlog.s.words; 72 | 73 | // how often | how many 74 | if(qlog.s.plainText.indexOf("How many")!=-1||qlog.s.plainText.indexOf("How often")!=-1||qlog.s.plainText.indexOf("how many")!=-1||qlog.s.plainText.indexOf("how often")!=-1) 75 | { 76 | for(Sparql sp: qlog.rankedSparqls) 77 | { 78 | sp.countTarget = true; 79 | // How many pages does War and Peace have? --> res:War_and_Peace dbo:numberOfPages ?n . 80 | // ?uri dbo:populationTotal ?inhabitants . 81 | for(Triple triple: sp.tripleList) 82 | { 83 | String p = Globals.pd.getPredicateById(triple.predicateID).toLowerCase(); 84 | if(p.contains("number") || p.contains("total") || p.contains("calories") || p.contains("satellites")) 85 | { 86 | sp.countTarget = false; 87 | } 88 | } 89 | } 90 | } 91 | 92 | // more than [num] [node] 93 | for(DependencyTreeNode dtn: ds.nodesList) 94 | { 95 | if(dtn.word.baseForm.equals("more")) 96 | { 97 | if(dtn.father!=null && dtn.father.word.baseForm.equals("than")) 98 | { 99 | DependencyTreeNode tmp = dtn.father; 100 | if(tmp.father!=null && tmp.father.word.posTag.equals("CD") && tmp.father.father!=null && tmp.father.father.word.posTag.startsWith("N")) 101 | { 102 | DependencyTreeNode target = tmp.father.father; 103 | 104 | // Which caves have more than 3 entrances | entranceCount | filter 105 | for(Sparql sp: qlog.rankedSparqls) 106 | { 107 | if(target.father !=null && target.father.word.baseForm.equals("have")) 108 | { 109 | sp.moreThanStr = "GROUP BY ?" + qlog.target.originalForm + "\nHAVING (COUNT(?"+target.word.originalForm + ") > "+tmp.father.word.baseForm+")"; 110 | } 111 | else 112 | { 113 | int num = translateNumbers(tmp.father.word.baseForm); 114 | sp.moreThanStr = "FILTER (?"+target.word.originalForm+"> " + num + ")"; 115 | } 116 | } 117 | } 118 | } 119 | } 120 | } 121 | 122 | // most 123 | for(Word word: words) 124 | { 125 | if(word.baseForm.equals("most")) 126 | { 127 | Word modifiedWord = word.modifiedWord; 128 | if(modifiedWord != null) 129 | { 130 | for(Sparql sp: qlog.rankedSparqls) 131 | { 132 | // Which Indian company has the most employees? --> ... dbo:numberOfEmployees ?n . || ?employees dbo:company ... 133 | sp.mostStr = "ORDER BY DESC(COUNT(?"+modifiedWord.originalForm+"))\nOFFSET 0 LIMIT 1"; 134 | for(Triple triple: sp.tripleList) 135 | { 136 | String p = Globals.pd.getPredicateById(triple.predicateID).toLowerCase(); 137 | if(p.contains("number") || p.contains("total")) 138 | { 139 | sp.mostStr = "ORDER BY DESC(?"+modifiedWord.originalForm+")\nOFFSET 0 LIMIT 1"; 140 | } 141 | } 142 | } 143 | } 144 | } 145 | } 146 | } 147 | 148 | public static void main(String[] args) { 149 | System.out.println(translateNumbers("Twelve")); 150 | System.out.println(translateNumbers("thirty two")); 151 | } 152 | 153 | } 154 | -------------------------------------------------------------------------------- /src/rdf/SemanticQueryGraph.java: -------------------------------------------------------------------------------- 1 | package rdf; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | import java.util.HashSet; 6 | 7 | import nlp.ds.Word; 8 | 9 | public class SemanticQueryGraph implements Comparable 10 | { 11 | public ArrayList semanticUnitList = null; 12 | public HashMap semanticRelations = new HashMap<>(); 13 | public double score = 0; 14 | 15 | public SemanticQueryGraph(ArrayList suList) 16 | { 17 | semanticUnitList = suList; //TODO: need copy? 18 | // Calculate Score by a reward function (TODO: using SVM-Rank) 19 | } 20 | 21 | public SemanticQueryGraph(SemanticQueryGraph head) 22 | { 23 | semanticUnitList = new ArrayList<>(); 24 | for(SemanticUnit su: head.semanticUnitList) 25 | semanticUnitList.add(su.copy()); 26 | score = head.score; 27 | } 28 | 29 | public void connect(SemanticUnit u, SemanticUnit v) 30 | { 31 | if(u.equals(v)) 32 | return; 33 | 34 | SemanticUnit su1 = null, su2 = null; 35 | for(SemanticUnit su: this.semanticUnitList) 36 | if(su.equals(u)) 37 | su1 = su; 38 | else if(su.equals(v)) 39 | su2 = su; 40 | if(su1 != null && su2 != null) 41 | if(!su1.neighborUnitList.contains(su2) && !su2.neighborUnitList.contains(su1)) 42 | { 43 | su1.neighborUnitList.add(su2); 44 | su2.neighborUnitList.add(su1); 45 | } 46 | } 47 | 48 | public void merge(SemanticUnit u, SemanticUnit v) 49 | { 50 | SemanticUnit su1 = null, su2 = null; 51 | for(SemanticUnit su: this.semanticUnitList) 52 | if(su.equals(u)) 53 | su1 = su; 54 | else if(su.equals(v)) 55 | su2 = su; 56 | if(su1 != null && su2 != null) 57 | { 58 | for(SemanticUnit su: this.semanticUnitList) 59 | if(su != su2 && su.neighborUnitList.contains(su1) && !su.neighborUnitList.contains(su2)) //TODO: Notice, now REJECT multi-edges; The hash function of SR should be modified to allow multi-edges. 60 | su.neighborUnitList.add(su2); 61 | 62 | this.semanticUnitList.remove(su1); 63 | su2.neighborUnitList.remove(su1); 64 | } 65 | } 66 | 67 | @Override 68 | public int hashCode() { 69 | int code = 0; 70 | for(SemanticUnit su: this.semanticUnitList) 71 | code ^= su.hashCode(); 72 | return code; 73 | } 74 | 75 | @Override 76 | public boolean equals(Object o) 77 | { 78 | if (o instanceof SemanticQueryGraph) 79 | { 80 | int matchCnt = 0; 81 | for(SemanticUnit su1: ((SemanticQueryGraph) o).semanticUnitList) 82 | for(SemanticUnit su2: this.semanticUnitList) 83 | { 84 | if(su1.equals(su2)) 85 | { 86 | if(su1.neighborUnitList.containsAll(su2.neighborUnitList) && su2.neighborUnitList.containsAll(su1.neighborUnitList)) 87 | matchCnt++; 88 | } 89 | } 90 | if(matchCnt == ((SemanticQueryGraph) o).semanticUnitList.size() && matchCnt == this.semanticUnitList.size()) 91 | return true; 92 | } 93 | return false; 94 | } 95 | 96 | @Override 97 | public int compareTo(SemanticQueryGraph o) 98 | { 99 | double diff = this.score - o.score; 100 | if (diff > 0) return -1; 101 | else if (diff < 0) return 1; 102 | else return 0; 103 | } 104 | 105 | public boolean isFinalState() 106 | { 107 | if(semanticUnitList == null || semanticUnitList.isEmpty()) 108 | return false; 109 | 110 | // Basic assumption: a final Semantic Query Graph should be Connected. 111 | HashSet visited = new HashSet<>(); 112 | SemanticUnit start = semanticUnitList.get(0); 113 | visited.add(start); 114 | dfs(start, visited); 115 | 116 | if(visited.size() == semanticUnitList.size()) 117 | return true; 118 | return false; 119 | } 120 | 121 | private void dfs(SemanticUnit headNode, HashSet visited) 122 | { 123 | for(SemanticUnit curNode: headNode.neighborUnitList) 124 | if(!visited.contains(curNode)) 125 | { 126 | visited.add(curNode); 127 | dfs(curNode, visited); 128 | } 129 | 130 | for(SemanticUnit curNode: semanticUnitList) 131 | { 132 | if(curNode.neighborUnitList.contains(headNode) || headNode.neighborUnitList.contains(curNode)) 133 | { 134 | if(!visited.contains(curNode)) 135 | { 136 | visited.add(curNode); 137 | dfs(curNode, visited); 138 | } 139 | } 140 | } 141 | } 142 | 143 | public void calculateScore(HashMap potentialSemanticRelations) 144 | { 145 | // 1. entity/type score 146 | double entSco = 0; 147 | for(SemanticUnit su: this.semanticUnitList) 148 | { 149 | Word w = su.centerWord; 150 | if(w.mayEnt && w.emList.size()>0) 151 | entSco += w.emList.get(0).score * 100; 152 | if(w.mayType && w.tmList.size()>0) 153 | entSco += w.tmList.get(0).score; 154 | } 155 | // 2. relation score 156 | double relSco = 0; 157 | int relCnt = 0; 158 | for(SemanticUnit su1: this.semanticUnitList) 159 | for(SemanticUnit su2: su1.neighborUnitList) 160 | { 161 | //Deduplicate 162 | if(su1.centerWord.position > su2.centerWord.position) 163 | continue; 164 | 165 | relCnt++; 166 | int key = su1.centerWord.getNnHead().hashCode() ^ su2.centerWord.getNnHead().hashCode(); 167 | SemanticRelation sr = potentialSemanticRelations.get(key); 168 | if(sr == null) 169 | System.err.println("No semantic relation for: " + su1 + " & " + su2); 170 | else 171 | { 172 | relSco += sr.predicateMappings.get(0).score; 173 | semanticRelations.put(key, sr); 174 | } 175 | } 176 | relSco/=relCnt; //average 177 | this.score = entSco + relSco; 178 | } 179 | } 180 | -------------------------------------------------------------------------------- /src/rdf/SemanticRelation.java: -------------------------------------------------------------------------------- 1 | package rdf; 2 | 3 | import java.util.ArrayList; 4 | 5 | import rdf.SimpleRelation; 6 | 7 | import nlp.ds.Word; 8 | 9 | public class SemanticRelation { 10 | public Word arg1Word = null; 11 | public Word arg2Word = null; 12 | public String relationParaphrase = null; // longest match 13 | public double LongestMatchingScore = 0; // longest match score 14 | 15 | //judge difference when copy semantic relation from special pattern 16 | public int arg1SuffixId = 0; 17 | public int arg2SuffixId = 0; 18 | 19 | public Word arg1Word_beforeCRR = null; 20 | public Word arg2Word_beforeCRR = null; 21 | 22 | public ArrayList predicateMappings = null; 23 | 24 | public boolean isArg1Constant = false; 25 | public boolean isArg2Constant = false; 26 | 27 | public char extractingMethod = ' '; // S: StanfordParser; M: MaltParser; N: N-gram; R: rules 28 | 29 | public SemanticRelation dependOnSemanticRelation = null; 30 | public Word preferredSubj = null; 31 | 32 | public boolean isSteadyEdge = true; 33 | 34 | public SemanticRelation(SemanticRelation r2) { 35 | arg1Word = r2.arg1Word; 36 | arg2Word = r2.arg2Word; 37 | relationParaphrase = r2.relationParaphrase; 38 | LongestMatchingScore = r2.LongestMatchingScore; 39 | 40 | arg1SuffixId = r2.arg1SuffixId; 41 | arg2SuffixId = r2.arg2SuffixId; 42 | 43 | arg1Word_beforeCRR = r2.arg1Word_beforeCRR; 44 | arg2Word_beforeCRR = r2.arg2Word_beforeCRR; 45 | 46 | arg1Word.emList = r2.arg1Word.emList; 47 | arg2Word.emList = r2.arg2Word.emList; 48 | predicateMappings = r2.predicateMappings; 49 | 50 | // arg1Types = r2.arg1Types; 51 | // arg2Types = r2.arg2Types; 52 | 53 | isArg1Constant = r2.isArg1Constant; 54 | isArg2Constant = r2.isArg2Constant; 55 | 56 | extractingMethod = r2.extractingMethod; 57 | 58 | dependOnSemanticRelation = r2.dependOnSemanticRelation; 59 | preferredSubj = r2.preferredSubj; 60 | } 61 | 62 | public void swapArg1Arg2() 63 | { 64 | Word tmpWord = arg1Word; 65 | arg1Word = arg2Word; 66 | arg2Word = tmpWord; 67 | int tmpSuffixId = arg1SuffixId; 68 | arg1SuffixId = arg2SuffixId; 69 | arg2SuffixId = tmpSuffixId; 70 | tmpWord = arg1Word_beforeCRR; 71 | arg1Word_beforeCRR = arg2Word_beforeCRR; 72 | arg2Word_beforeCRR = tmpWord; 73 | boolean tmpBool = isArg1Constant; 74 | isArg1Constant = isArg2Constant; 75 | isArg2Constant = tmpBool; 76 | } 77 | 78 | public SemanticRelation (SimpleRelation simr) { 79 | if (simr.preferredSubj == null) { 80 | if (simr.arg1Word.compareTo(simr.arg2Word) < 0) { 81 | this.arg1Word = simr.arg1Word; 82 | this.arg2Word = simr.arg2Word; 83 | this.arg1Word_beforeCRR = simr.arg1Word_beforeCRR; 84 | this.arg2Word_beforeCRR = simr.arg2Word_beforeCRR; 85 | } 86 | else { 87 | this.arg1Word = simr.arg2Word; 88 | this.arg2Word = simr.arg1Word; 89 | this.arg1Word_beforeCRR = simr.arg2Word_beforeCRR; 90 | this.arg2Word_beforeCRR = simr.arg1Word_beforeCRR; 91 | } 92 | this.extractingMethod = simr.extractingMethod; 93 | } 94 | else { 95 | if (simr.arg1Word == simr.preferredSubj) { 96 | this.arg1Word = simr.arg1Word; 97 | this.arg2Word = simr.arg2Word; 98 | this.arg1Word_beforeCRR = simr.arg1Word_beforeCRR; 99 | this.arg2Word_beforeCRR = simr.arg2Word_beforeCRR; 100 | this.preferredSubj = simr.preferredSubj; 101 | } 102 | else { 103 | this.arg1Word = simr.arg2Word; 104 | this.arg2Word = simr.arg1Word; 105 | this.arg1Word_beforeCRR = simr.arg2Word_beforeCRR; 106 | this.arg2Word_beforeCRR = simr.arg1Word_beforeCRR; 107 | this.preferredSubj = simr.preferredSubj; 108 | } 109 | this.extractingMethod = simr.extractingMethod; 110 | } 111 | } 112 | 113 | @Override 114 | public int hashCode() { 115 | return arg1Word.hashCode() ^ arg2Word.hashCode() + arg1SuffixId + arg2SuffixId; 116 | } 117 | 118 | @Override 119 | public boolean equals(Object o) { 120 | if (o instanceof SemanticRelation) { 121 | SemanticRelation sr2 = (SemanticRelation) o; 122 | if (this.arg1Word.equals(sr2.arg1Word) 123 | && this.arg2Word.equals(sr2.arg2Word) 124 | && this.arg1SuffixId == sr2.arg1SuffixId 125 | && this.arg2SuffixId == sr2.arg2SuffixId 126 | && this.relationParaphrase.equals(sr2.relationParaphrase) 127 | && this.LongestMatchingScore == sr2.LongestMatchingScore) { 128 | return true; 129 | } 130 | } 131 | return false; 132 | } 133 | 134 | @Override 135 | public String toString() { 136 | return arg1Word.originalForm + "," + arg2Word.originalForm + "," + relationParaphrase + "," + LongestMatchingScore + "["+extractingMethod+"]"; 137 | // return arg1Word.getFullEntityName() + "," + arg2Word.getFullEntityName() + "," + relationParaphrase + "," + LongestMatchingScore + "["+extractingMethod+"]"; 138 | } 139 | 140 | public void normalizeScore() 141 | { 142 | double maxScore; 143 | 144 | if (arg1Word.emList!=null && !arg1Word.emList.isEmpty()) 145 | { 146 | maxScore=0.0; 147 | for (EntityMapping em : arg1Word.emList) 148 | maxScore = Math.max(maxScore, em.score); 149 | for (EntityMapping em : arg1Word.emList) 150 | em.score = em.score/maxScore; 151 | } 152 | 153 | if (arg2Word.emList!=null && !arg2Word.emList.isEmpty()) 154 | { 155 | maxScore=0.0; 156 | for (EntityMapping em : arg2Word.emList) 157 | maxScore = Math.max(maxScore, em.score); 158 | for (EntityMapping em : arg2Word.emList) 159 | em.score = em.score/maxScore; 160 | } 161 | 162 | if (predicateMappings!=null && !predicateMappings.isEmpty()) 163 | { 164 | maxScore=0.0; 165 | for (PredicateMapping pm : predicateMappings) 166 | maxScore = Math.max(maxScore, pm.score); 167 | for (PredicateMapping pm : predicateMappings) 168 | pm.score = pm.score/maxScore; 169 | } 170 | } 171 | } 172 | -------------------------------------------------------------------------------- /src/qa/mapping/DBpediaLookup.java: -------------------------------------------------------------------------------- 1 | package qa.mapping; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.IOException; 5 | import java.io.InputStreamReader; 6 | import java.util.ArrayList; 7 | import java.util.HashMap; 8 | 9 | import lcn.EntityFragmentFields; 10 | import log.QueryLogger; 11 | 12 | import org.apache.commons.httpclient.HttpClient; 13 | import org.apache.commons.httpclient.HttpException; 14 | import org.apache.commons.httpclient.methods.GetMethod; 15 | 16 | import fgmt.EntityFragment; 17 | import rdf.EntityMapping; 18 | 19 | public class DBpediaLookup { 20 | //There are two websites of the DBpediaLookup online service. 21 | //public static final String baseURL = "http://en.wikipedia.org/w/api.php?action=opensearch&format=xml&limit=10&search="; 22 | public static final String baseURL = "http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?MaxHits=5&QueryString="; 23 | 24 | public HttpClient ctripHttpClient = null; 25 | 26 | //public static final String begin = ""; 27 | //public static final String begin = "\n "; 31 | public static final String end = ""; 32 | public static final int end_length = end.length(); 33 | 34 | public static HashMapentMentionDict = null; // TODO: base on redirect data & wikipedia click data to build mention2ent's dictionary, now just manually 35 | 36 | public DBpediaLookup() 37 | { 38 | ctripHttpClient = new HttpClient(); 39 | ctripHttpClient.setTimeout(3000); 40 | 41 | entMentionDict = new HashMap(); 42 | entMentionDict.put("Prince_Charles", "Charles,_Prince_of_Wales"); 43 | } 44 | 45 | public ArrayList getEntityMappings(String searchString, QueryLogger qlog) 46 | { 47 | ArrayList slist = new ArrayList(); 48 | if(entMentionDict.containsKey(searchString)) 49 | slist.add(entMentionDict.get(searchString)); 50 | else 51 | slist = lookForEntityNames(searchString, qlog); 52 | 53 | if (slist.size() == 0 && searchString.contains(". ")) 54 | slist.addAll(lookForEntityNames(searchString.replaceAll(". ", "."), qlog)); 55 | 56 | ArrayList emlist = new ArrayList(); 57 | 58 | // Now string use "_" as delimiter (original) 59 | String[] sa = searchString.split("_"); 60 | int UpperCnt = 0; 61 | for(String str: sa) 62 | { 63 | if( (str.charAt(0)>='A'&&str.charAt(0)<='Z') || (str.charAt(0)>='0'&&str.charAt(0)<='9') ) 64 | UpperCnt ++; 65 | } 66 | 67 | System.out.print("DBpediaLookup find: " + slist + ", "); 68 | 69 | int count = 40; 70 | for (String s : slist) 71 | { 72 | //consider ABBR only when all UPPER; drop when too long edit distance 73 | if(UpperCnt < sa.length && EntityFragment.calEditDistance(s, searchString.replace("_", ""))>searchString.length()/2) 74 | continue; 75 | 76 | int eid = -1; 77 | s = s.replace(" ", "_"); 78 | if(EntityFragmentFields.entityName2Id.containsKey(s)) 79 | { 80 | eid = EntityFragmentFields.entityName2Id.get(s); 81 | emlist.add(new EntityMapping(eid, s, count)); 82 | count -=2 ; 83 | } 84 | else 85 | { 86 | System.out.print("Drop "+s+" because it not in Entity Dictionary. "); 87 | } 88 | } 89 | System.out.println("DBpediaLookup select: " + emlist); 90 | 91 | return emlist; 92 | } 93 | 94 | public ArrayList lookForEntityNames (String searchString, QueryLogger qlog) { 95 | // URL transition: " " -> %20 96 | GetMethod getMethod = new GetMethod((baseURL+searchString).replaceAll(" ", "%20")); 97 | ArrayList ret = new ArrayList(); 98 | int statusCode; 99 | 100 | try { 101 | statusCode = ctripHttpClient.executeMethod(getMethod); 102 | } catch (HttpException e) { 103 | e.printStackTrace(); 104 | return ret; 105 | } catch (IOException e) { 106 | e.printStackTrace(); 107 | return ret; 108 | } 109 | 110 | if (statusCode!=200) return null; 111 | 112 | String response = getMethod.getResponseBodyAsString(); 113 | if (qlog != null && qlog.MODE_debug) { 114 | System.out.println("searchString=" + searchString); 115 | System.out.println("statusCode=" + statusCode); 116 | System.out.println("response=" + getMethod.getResponseBodyAsString()); 117 | } 118 | getMethod.releaseConnection(); 119 | 120 | //System.out.println(response); 121 | 122 | if (response == null || response.isEmpty()) 123 | return ret; 124 | int idx1 = response.indexOf(begin); 125 | while (idx1 != -1) { 126 | int idx2 = response.indexOf(end, idx1+begin_length); 127 | String ss = response.substring(idx1+begin_length, idx2); 128 | ret.add(ss); 129 | //System.out.println(ss); 130 | idx1 = response.indexOf(begin, idx2 + end_length); 131 | } 132 | 133 | return ret; 134 | } 135 | 136 | public static void main(String argv[]){ 137 | 138 | DBpediaLookup dbplook = new DBpediaLookup(); 139 | 140 | BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); 141 | try { 142 | while (true) { 143 | System.out.println("Test DBpediaLookup."); 144 | System.out.print("Please input the search string: "); 145 | String searchString = br.readLine(); 146 | try { 147 | long t1 = System.currentTimeMillis(); 148 | ArrayList res = dbplook.lookForEntityNames(searchString, null); 149 | long t2 = System.currentTimeMillis(); 150 | System.out.println(res); 151 | System.out.println("time=" + (t2-t1) + "ms"); 152 | } catch (Exception e) { 153 | e.printStackTrace(); 154 | } 155 | } 156 | } catch (IOException e) { 157 | e.printStackTrace(); 158 | } 159 | 160 | 161 | return; 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /src/fgmt/TypeFragment.java: -------------------------------------------------------------------------------- 1 | package fgmt; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.File; 5 | import java.io.FileInputStream; 6 | import java.io.IOException; 7 | import java.io.InputStreamReader; 8 | import java.util.ArrayList; 9 | import java.util.HashMap; 10 | import java.util.HashSet; 11 | 12 | import qa.Globals; 13 | 14 | 15 | public class TypeFragment extends Fragment { 16 | 17 | public static HashMap typeFragments = null; 18 | public static HashMap> typeShortName2IdList = null; 19 | public static HashMap typeId2ShortName = null; 20 | public static final int NO_RELATION = -24232; 21 | 22 | public static HashSet yagoTypeList = null; 23 | 24 | public HashSet inEdges = new HashSet(); 25 | public HashSet outEdges = new HashSet(); 26 | public HashSet entSet = new HashSet(); 27 | 28 | /* 29 | * Eliminate some bad YAGO Types which conflict with: 30 | * 1, ENT: amazon、earth、the_hunger_game、sparkling_wine 31 | * 2, TYPE: type 32 | * 3, RELATION: flow、owner、series、shot、part、care 33 | * 4, others: peace、vice 34 | */ 35 | public static ArrayList stopYagoTypeList = null; 36 | static void loadStopYagoTypeList() 37 | { 38 | stopYagoTypeList = new ArrayList(); 39 | stopYagoTypeList.add("Amazon"); 40 | stopYagoTypeList.add("Earth"); 41 | stopYagoTypeList.add("TheHungerGames"); 42 | stopYagoTypeList.add("SparklingWine"); 43 | stopYagoTypeList.add("Type"); 44 | stopYagoTypeList.add("Flow"); 45 | stopYagoTypeList.add("Owner"); 46 | stopYagoTypeList.add("Series"); 47 | stopYagoTypeList.add("Shot"); 48 | stopYagoTypeList.add("Part"); 49 | stopYagoTypeList.add("Care"); 50 | stopYagoTypeList.add("Peace"); 51 | stopYagoTypeList.add("Vice"); 52 | stopYagoTypeList.add("Dodo"); 53 | stopYagoTypeList.add("CzechFilms"); 54 | stopYagoTypeList.add("ChineseFilms"); 55 | } 56 | 57 | public TypeFragment(String fgmt, int fid) 58 | { 59 | fragmentId = fid; 60 | fragmentType = typeEnum.TYPE_FRAGMENT; 61 | 62 | fgmt = fgmt.replace('|', '#'); 63 | String[] ss = fgmt.split("#"); 64 | String[] nums; 65 | 66 | if (ss[0].length() > 0) { 67 | nums = ss[0].split(","); 68 | for(int i = 0; i < nums.length; i ++) { 69 | if (nums[i].length() > 0) { 70 | inEdges.add(Integer.parseInt(nums[i])); 71 | } 72 | } 73 | } 74 | else { 75 | inEdges.add(NO_RELATION); 76 | } 77 | 78 | if (ss.length > 1 && ss[1].length() > 0) { 79 | nums = ss[1].split(","); 80 | for(int i = 0; i < nums.length; i ++) { 81 | if (nums[i].length() > 0) { 82 | outEdges.add(Integer.parseInt(nums[i])); 83 | } 84 | } 85 | } 86 | else { 87 | outEdges.add(NO_RELATION); 88 | } 89 | 90 | if(ss.length > 2 && ss[2].length() > 0) 91 | { 92 | nums = ss[2].split(","); 93 | for(int i = 0; i < nums.length; i ++) { 94 | if (nums[i].length() > 0) { 95 | entSet.add(Integer.parseInt(nums[i])); 96 | } 97 | } 98 | } 99 | } 100 | 101 | public static void load() throws Exception 102 | { 103 | String filename = Globals.localPath+"data/DBpedia2016/fragments/class_RDF_fragment/16type_fragment.txt"; 104 | 105 | File file = new File(filename); 106 | InputStreamReader in = new InputStreamReader(new FileInputStream(file),"utf-8"); 107 | BufferedReader br = new BufferedReader(in); 108 | 109 | typeFragments = new HashMap(); 110 | 111 | System.out.println("Loading type IDs and Fragments ..."); 112 | String line; 113 | while((line = br.readLine()) != null) { 114 | String[] lines = line.split("\t"); 115 | TypeFragment tfgmt = null; 116 | if(lines[0].length() > 0 && !lines[0].equals("literal")) { 117 | int tid = Integer.parseInt(lines[0]); 118 | try{tfgmt = new TypeFragment(lines[1], tid);} 119 | catch(Exception e){} 120 | 121 | 122 | typeFragments.put(tid, tfgmt); 123 | } 124 | } 125 | 126 | br.close(); 127 | 128 | // can fix some data there 129 | // load Type Id 130 | loadId(); 131 | System.out.println("Load "+typeId2ShortName.size()+" basic types and "+yagoTypeList.size()+" yago types."); 132 | } 133 | 134 | public static void loadId() throws IOException 135 | { 136 | String filename = Globals.localPath+"data/DBpedia2016/fragments/id_mappings/16basic_types_id.txt"; 137 | String yagoFileName = Globals.localPath+"data/DBpedia2016/fragments/id_mappings/16yago_types_list.txt"; 138 | 139 | File file = new File(filename); 140 | InputStreamReader in = new InputStreamReader(new FileInputStream(file),"utf-8"); 141 | BufferedReader br = new BufferedReader(in); 142 | 143 | typeShortName2IdList = new HashMap>(); 144 | typeId2ShortName = new HashMap(); 145 | 146 | String line; 147 | while((line = br.readLine()) != null) { 148 | String[] lines = line.split("\t"); 149 | String typeShortName = lines[0]; 150 | // reserve typeShortName's capitalization 151 | if (!typeShortName2IdList.containsKey(typeShortName)) { 152 | typeShortName2IdList.put(typeShortName, new ArrayList()); 153 | } 154 | typeShortName2IdList.get(typeShortName).add(Integer.parseInt(lines[1])); 155 | typeId2ShortName.put(Integer.parseInt(lines[1]), typeShortName); 156 | } 157 | 158 | // literalType 159 | typeShortName2IdList.put("literal_HRZ", new ArrayList()); 160 | typeShortName2IdList.get("literal_HRZ").add(RelationFragment.literalTypeId); 161 | typeId2ShortName.put(RelationFragment.literalTypeId, "literal_HRZ"); 162 | 163 | br.close(); 164 | 165 | //load YAGO types 166 | in = new InputStreamReader(new FileInputStream(yagoFileName),"utf-8"); 167 | br = new BufferedReader(in); 168 | yagoTypeList = new HashSet(); 169 | while((line = br.readLine())!=null) 170 | { 171 | String[] lines = line.split("\t"); 172 | String typeName = lines[0]; 173 | yagoTypeList.add(typeName); 174 | } 175 | 176 | loadStopYagoTypeList(); 177 | yagoTypeList.removeAll(stopYagoTypeList); 178 | } 179 | } 180 | -------------------------------------------------------------------------------- /src/lcn/SearchInTypeShortName.java: -------------------------------------------------------------------------------- 1 | package lcn; 2 | 3 | import java.util.ArrayList; 4 | 5 | import org.apache.lucene.analysis.Analyzer; 6 | import org.apache.lucene.analysis.standard.StandardAnalyzer; 7 | import org.apache.lucene.queryParser.ParseException; 8 | import org.apache.lucene.queryParser.QueryParser; 9 | import org.apache.lucene.search.Hits; 10 | import org.apache.lucene.search.IndexSearcher; 11 | import org.apache.lucene.search.Query; 12 | 13 | import fgmt.TypeFragment; 14 | import qa.Globals; 15 | import rdf.TypeMapping; 16 | 17 | public class SearchInTypeShortName { 18 | // get id and score -- husen 19 | public ArrayList searchTypeScore(String s, double thres1, double thres2, int k) throws Exception 20 | { 21 | Hits hits = null; 22 | String queryString = s; 23 | Query query = null; 24 | 25 | IndexSearcher searcher = new IndexSearcher(Globals.localPath+"data/DBpedia2016/lucene/type_fragment_index"); 26 | 27 | ArrayList tmList = new ArrayList(); 28 | 29 | Analyzer analyzer = new StandardAnalyzer(); 30 | try { 31 | QueryParser qp = new QueryParser("SplittedTypeShortName", analyzer); 32 | query = qp.parse(queryString); 33 | } catch (ParseException e) { 34 | e.printStackTrace(); 35 | } 36 | 37 | if (searcher != null) { 38 | hits = searcher.search(query); 39 | 40 | //System.out.println("find " + hits.length() + " matched type."); 41 | if (hits.length() > 0) { 42 | for (int i=0; i= thres1) 46 | { 47 | //System.out.println("Score>=thres1("+thres1+") ---" + hits.doc(i).get("TypeShortName") + " : " + hits.score(i)); 48 | String type = hits.doc(i).get("TypeShortName"); 49 | System.out.println("Matched type: " + type + " : " + hits.score(i)); 50 | 51 | ArrayList ret_in = TypeFragment.typeShortName2IdList.get(type); 52 | if(ret_in!=null) 53 | { 54 | for(Integer tid: ret_in) 55 | { 56 | TypeMapping typeMapping = new TypeMapping(tid, hits.doc(i).get("TypeShortName"), hits.score(i)); 57 | tmList.add(typeMapping); 58 | } 59 | } 60 | } 61 | else { 62 | break; 63 | } 64 | } 65 | else { 66 | if(hits.score(i) >= thres2) 67 | { 68 | System.out.println("<<<<---" + hits.doc(i).get("TypeShortName") + " : " + hits.score(i)); 69 | 70 | ArrayList ret_in = TypeFragment.typeShortName2IdList.get(s); 71 | if(ret_in!=null) 72 | { 73 | for(Integer tid: ret_in) 74 | { 75 | TypeMapping typeMapping = new TypeMapping(tid, hits.doc(i).get("TypeShortName"), hits.score(i)); 76 | tmList.add(typeMapping); 77 | } 78 | } 79 | } 80 | else { 81 | break; 82 | } 83 | } 84 | } 85 | } 86 | } 87 | return tmList; 88 | } 89 | 90 | public ArrayList searchType(String s, double thres1, double thres2, int k) throws Exception 91 | { 92 | Hits hits = null; 93 | String queryString = null; 94 | Query query = null; 95 | 96 | IndexSearcher searcher = new IndexSearcher(Globals.localPath+"data/DBpedia2016/lucene/type_fragment_index"); 97 | 98 | ArrayList typeNames = new ArrayList(); 99 | 100 | //String[] array = s.split(" "); 101 | //queryString = array[array.length-1]; 102 | queryString = s; 103 | 104 | Analyzer analyzer = new StandardAnalyzer(); 105 | try { 106 | QueryParser qp = new QueryParser("SplittedTypeShortName", analyzer); 107 | query = qp.parse(queryString); 108 | } catch (ParseException e) { 109 | e.printStackTrace(); 110 | } 111 | 112 | if (searcher != null) { 113 | hits = searcher.search(query); 114 | 115 | System.out.println("find " + hits.length() + " answars!"); 116 | if (hits.length() > 0) { 117 | for (int i=0; i= thres1){ 121 | System.out.println("Score>=thres1("+thres1+") ---" + hits.doc(i).get("TypeShortName") + " : " + hits.score(i)); 122 | typeNames.add(hits.doc(i).get("TypeShortName")); 123 | //if (satisfiedStrictly(hits.doc(i).get("SplittedTypeShortName"), queryString)) typeNames.add(hits.doc(i).get("TypeShortName")); 124 | } 125 | else { 126 | //break; 127 | } 128 | } 129 | else { 130 | if(hits.score(i) >= thres2){ 131 | System.out.println("<<<<---" + hits.doc(i).get("TypeShortName") + " : " + hits.score(i)); 132 | typeNames.add(hits.doc(i).get("TypeShortName")); 133 | //if (satisfiedStrictly(hits.doc(i).get("SplittedTypeShortName"), queryString)) typeNames.add(hits.doc(i).get("TypeShortName")); 134 | } 135 | else { 136 | break; 137 | } 138 | } 139 | } 140 | } 141 | } 142 | return typeNames; 143 | } 144 | 145 | private boolean satisfiedStrictly (String splittedTypeShortName, String queryString) 146 | { 147 | String[] tnames = splittedTypeShortName.toLowerCase().split(" "); 148 | String[] qnames = queryString.toLowerCase().split(" "); 149 | for (int i = 0; i < tnames.length; i ++) { 150 | if (tnames[i].length() == 0) continue; 151 | boolean matched = false; 152 | for (int j = 0; j < qnames.length; j ++) { 153 | if (tnames[i].equals(qnames[j])) { 154 | matched = true; 155 | break; 156 | } 157 | } 158 | if (!matched && !Globals.stopWordsList.isStopWord(tnames[i])) { 159 | return false; 160 | } 161 | } 162 | String qlast = qnames[qnames.length-1]; 163 | boolean flag = false; 164 | for (int i = 0; i < tnames.length; i ++) { 165 | if (tnames[i].length() == 0) continue; 166 | if (tnames[i].equals(qlast)) { 167 | flag = true; 168 | break; 169 | } 170 | } 171 | 172 | if (flag) return true; 173 | else return false; 174 | } 175 | 176 | } 177 | -------------------------------------------------------------------------------- /src/qa/extract/CorefResolution.java: -------------------------------------------------------------------------------- 1 | package qa.extract; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashSet; 5 | 6 | import qa.Globals; 7 | 8 | import log.QueryLogger; 9 | 10 | import nlp.ds.DependencyTree; 11 | import nlp.ds.DependencyTreeNode; 12 | import nlp.ds.Word; 13 | import rdf.SimpleRelation; 14 | 15 | public class CorefResolution { 16 | /** 17 | * 1. a very simple reference resolution 18 | * 2. Coref Resolution should be done after relation extraction and before items mapping 19 | */ 20 | public void process(ArrayList simpleRelations, QueryLogger qlog) { 21 | if (qlog.s.words.length <= 4) return; // if the sentence is too short, skip the coref step. 22 | System.out.println("=====Co-reference resolution======="); 23 | ArrayList deleteList = new ArrayList(); 24 | 25 | for(SimpleRelation sr : simpleRelations) { 26 | Word w1=null, w2=null; 27 | 28 | if (sr.extractingMethod == 'S') { 29 | w1 = getRefWord(sr.arg1Word.getNnHead(), qlog.s.dependencyTreeStanford, qlog); 30 | w2 = getRefWord(sr.arg2Word.getNnHead(), qlog.s.dependencyTreeStanford, qlog); 31 | } 32 | else if (sr.extractingMethod == 'M') { 33 | w1 = getRefWord(sr.arg1Word.getNnHead(), qlog.s.dependencyTreeMalt, qlog); 34 | w2 = getRefWord(sr.arg2Word.getNnHead(), qlog.s.dependencyTreeMalt, qlog); 35 | } 36 | else { 37 | continue; 38 | } 39 | 40 | if (w1 != null) { 41 | sr.arg1Word_beforeCRR = sr.arg1Word; 42 | sr.arg1Word = w1; 43 | } 44 | if (w2 != null) { 45 | sr.arg2Word_beforeCRR = sr.arg2Word; 46 | sr.arg2Word = w2; 47 | } 48 | 49 | if (sr.arg1Word == sr.arg2Word) 50 | deleteList.add(sr); 51 | } 52 | 53 | simpleRelations.removeAll(deleteList); 54 | 55 | printCRR(qlog); 56 | System.out.println("==================================="); 57 | } 58 | 59 | // return the reference word of w 60 | public Word getRefWord (Word w, DependencyTree dt, QueryLogger qlog) { 61 | w = w.getNnHead(); 62 | 63 | if (w.crr != null) { 64 | return w.crr; 65 | } 66 | 67 | /* 68 | * method: (suitable for stanford parser (old version)) 69 | * (1) WDT --det--> [] eg: Which city is located in China? 70 | * (2) WDT -------> V/J --rcmod--> [] eg: Who is married to someone that was born in Rome? 71 | * "when is the sth" is conflict with this rule, so discarded. (3) W -------> be <------- [] eg: Who is the author of WikiLeaks? 72 | * (4) WDT -------> V --ccomp--> [] eg: The actor that married the child of a politician. 73 | * (5) DT(that, which) --dep--> V eg:The actors that married an athlete. // DS parser error. 74 | * (6) W(position=1) ------> NN eg:What are the language used in China? // DS parser error, should eliminate "WRB":When was Carlo Giuliani shot? 75 | * (7) where <--advmod-- V <--advcl-- V --prep/pobj--> [] eg: Who graduate from the school where Keqiang Li graduates? 76 | */ 77 | 78 | DependencyTreeNode dtn = dt.getNodeByIndex(w.position); 79 | 80 | // no need for root 81 | if (dtn.father == null) return null; 82 | 83 | try { 84 | if(dtn.word.posTag.equals("WDT") && dtn.dep_father2child.equals("det")) { // (1) 85 | if(qlog.MODE_debug) System.out.println(w + "-->" + dtn.father.word.getNnHead()); 86 | w.crr = dtn.father.word.getNnHead(); 87 | } 88 | else if(dtn.word.posTag.startsWith("W") && !dtn.word.posTag.equals("WRB") && dtn.word.position == 1 && dtn.father.word.posTag.equals("NN")) { // (6) 89 | if(qlog.MODE_debug) System.out.println(w + "-->" + dtn.father.word.getNnHead()); 90 | w.crr = dtn.father.word.getNnHead(); 91 | } 92 | else if(dtn.word.posTag.equals("DT") 93 | && dtn.dep_father2child.equals("dep") 94 | && (dtn.word.baseForm.equals("that")||dtn.word.baseForm.equals("which"))) { // (5) 95 | if(qlog.MODE_debug) System.out.println(w + "-->" + dtn.father.word.getNnHead()); 96 | w.crr = dtn.father.word.getNnHead(); 97 | } 98 | // else if(dtn.word.posTag.startsWith("W") 99 | // && dtn.father.word.baseForm.equals("be")) { // (3) //&& dtn.dep_father2child.equals("attr") 100 | // DependencyTreeNode target = dtn.father.containDependencyWithChildren("nsubj"); 101 | // if (target != null) { 102 | // if(qlog.MODE_debug) System.out.println(w + "-->" + target.word.getNnHead()); 103 | // w.crr = target.word.getNnHead(); 104 | // } 105 | // } 106 | else if(dtn.word.posTag.equals("WDT") 107 | && (dtn.father.word.posTag.startsWith("V") || dtn.father.word.posTag.startsWith("J")) 108 | && dtn.father.dep_father2child.equals("rcmod")) { // (2) 109 | if(qlog.MODE_debug) System.out.println(w + "-->" + dtn.father.father.word.getNnHead()); 110 | w.crr = dtn.father.father.word.getNnHead(); 111 | } 112 | else if(dtn.word.posTag.equals("WDT") 113 | && dtn.father.word.posTag.startsWith("V") 114 | && dtn.father.dep_father2child.equals("ccomp")) { // (4) 115 | if(qlog.MODE_debug) System.out.println(w + "-->" + dtn.father.father.word.getNnHead()); 116 | w.crr = dtn.father.father.word.getNnHead(); 117 | } 118 | else if (dtn.word.baseForm.equals("where") 119 | && dtn.dep_father2child.equals("advmod") 120 | && dtn.father.dep_father2child.equals("advcl")) { // (7) 121 | DependencyTreeNode target = dtn.father.father.containDependencyWithChildren("prep"); 122 | if (target != null) { 123 | target = target.containDependencyWithChildren("pobj"); 124 | } 125 | else { 126 | for (DependencyTreeNode n : dtn.father.father.childrenList) { 127 | if (Globals.pd.relns_object.contains(n.dep_father2child)) { 128 | target = n; 129 | } 130 | } 131 | } 132 | if (target != null) { 133 | if(qlog.MODE_debug) System.out.println(w + "-->" + target.word.getNnHead()); 134 | w.crr = target.word.getNnHead(); 135 | } 136 | } 137 | } catch (Exception e) {} 138 | 139 | return w.crr; 140 | } 141 | 142 | public void printCRR (QueryLogger qlog) { 143 | HashSet printed = new HashSet(); 144 | for (Word w : qlog.s.words) { 145 | w = w.getNnHead(); 146 | if (printed.contains(w)) 147 | continue; 148 | if (w.crr != null) 149 | System.out.println("\""+w.getFullEntityName() + "\" is resoluted to \"" + w.crr.getFullEntityName() + "\""); 150 | printed.add(w); 151 | } 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /src/rdf/Sparql.java: -------------------------------------------------------------------------------- 1 | package rdf; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collections; 5 | import java.util.HashMap; 6 | import java.util.HashSet; 7 | 8 | import qa.Globals; 9 | 10 | public class Sparql implements Comparable 11 | { 12 | public ArrayList tripleList = new ArrayList(); 13 | public boolean countTarget = false; 14 | public String mostStr = null; 15 | public String moreThanStr = null; 16 | public double score = 0; 17 | 18 | public String questionFocus = null; // The answer variable 19 | public HashSet variables = new HashSet(); 20 | 21 | public enum QueryType {Select,Ask} 22 | public QueryType queryType = QueryType.Select; 23 | 24 | public HashMap semanticRelations = null; 25 | 26 | public void addTriple(Triple t) 27 | { 28 | if(!tripleList.contains(t)) 29 | { 30 | tripleList.add(t); 31 | score += t.score; 32 | } 33 | } 34 | 35 | public void delTriple(Triple t) 36 | { 37 | if(tripleList.contains(t)) 38 | { 39 | tripleList.remove(t); 40 | score -= t.score; 41 | } 42 | } 43 | 44 | @Override 45 | public String toString() 46 | { 47 | String ret = ""; 48 | for (Triple t : tripleList) { 49 | ret += t.toString(); 50 | ret += '\n'; 51 | } 52 | return ret; 53 | } 54 | 55 | public void deduplicate() 56 | { 57 | HashSet set = new HashSet(); 58 | ArrayList list = new ArrayList(); 59 | for(Triple t: tripleList) 60 | { 61 | String st = t.toStringWithoutScore(); 62 | if(set.contains(st)) 63 | list.add(t); 64 | set.add(st); 65 | } 66 | for(Triple t: list) 67 | this.delTriple(t); 68 | } 69 | 70 | // Is it a Basic Graph Pattern without filter and aggregation? 71 | public boolean isBGP() 72 | { 73 | if(moreThanStr != null || mostStr != null || countTarget) 74 | return false; 75 | return true; 76 | } 77 | 78 | //Use to display (can not be executed) 79 | public String toStringForGStore() 80 | { 81 | String ret = ""; 82 | for (Triple t : tripleList) 83 | { 84 | // !Omit obvious LITERAL 85 | if(t.object.equals("literal_HRZ")) 86 | continue; 87 | 88 | // !Omit some bad TYPEs 89 | if(t.predicateID==Globals.pd.typePredicateID && Globals.pd.bannedTypes.contains(t.object)) 90 | continue; 91 | 92 | ret += t.toStringForGStore(); 93 | ret += '\n'; 94 | } 95 | return ret; 96 | } 97 | 98 | /** 99 | * @description: 100 | * 1. Select all variables for BGP queries to display specific information. 101 | * 2. DO NOT select all variables when Aggregation like "HAVING" "COUNT" ... 102 | * (It may involves too many results, e.g. "which countries have more than 1000 caves?", caves is no need to display) 103 | * @param: NULL. 104 | * @return: A SPARQL query can be executed by GStore (NO prefix of entities/predicates). 105 | */ 106 | public String toStringForGStore2() 107 | { 108 | String ret = ""; 109 | variables.clear(); 110 | for(Triple t: tripleList) 111 | { 112 | if (!t.isSubjConstant()) variables.add(t.subject.replaceAll(" ", "_")); 113 | if (!t.isObjConstant()) variables.add(t.object.replaceAll(" ", "_")); 114 | } 115 | if(variables.size() == 0) 116 | queryType = QueryType.Ask; 117 | 118 | // part1: select / ask ... 119 | if (queryType==QueryType.Ask) 120 | ret += "ask"; 121 | else if(countTarget) 122 | ret += ("select COUNT(DISTINCT " + questionFocus + ")"); 123 | else 124 | { 125 | if(!isBGP()) // AGG: select question focus 126 | ret += ("select DISTINCT " + questionFocus); 127 | else // BGP: select all variables 128 | { 129 | ret += "select DISTINCT "; 130 | for (String v : variables) 131 | ret += v + " "; 132 | } 133 | } 134 | 135 | // part2: triples 136 | ret += " where { "; 137 | for(Triple t : tripleList) 138 | { 139 | if (!t.object.equals("literal_HRZ")) { // need not display literal 140 | ret += t.toStringForGStore(); 141 | ret += ". "; 142 | } 143 | } 144 | ret += "} "; 145 | 146 | // part3: order by / group by ... 147 | if(moreThanStr != null) 148 | ret += moreThanStr+" "; 149 | if(mostStr != null) 150 | ret += mostStr+" "; 151 | 152 | // part4: limit 153 | if(queryType != QueryType.Ask && (mostStr == null || !mostStr.contains("LIMIT"))) 154 | ret += "LIMIT " + Globals.MaxAnswerNum; 155 | 156 | return ret; 157 | } 158 | 159 | public int getVariableNumber() 160 | { 161 | int res = 0; 162 | for (Triple t: tripleList) 163 | { 164 | if (!t.isSubjConstant()) res++; 165 | if (!t.isObjConstant()) res++; 166 | } 167 | return res; 168 | } 169 | 170 | public void adjustTriplesOrder() 171 | { 172 | Collections.sort(this.tripleList); 173 | } 174 | 175 | public int compareTo(Sparql o) 176 | { 177 | double diff = this.score - o.score; 178 | if (diff > 0) 179 | return -1; 180 | else if (diff < 0) 181 | return 1; 182 | else 183 | return 0; 184 | } 185 | 186 | @Override 187 | public int hashCode() 188 | { 189 | int key = 0; 190 | for(Triple t: this.tripleList) 191 | key ^= t.hashCode(); 192 | return key; 193 | } 194 | 195 | @Override 196 | public boolean equals(Object spq) 197 | { 198 | Sparql tempSparql= (Sparql) spq; 199 | if(this.toStringForGStore2().equals(tempSparql.toStringForGStore2())) 200 | return true; 201 | else 202 | return false; 203 | } 204 | 205 | public Sparql(){} 206 | public Sparql(HashMap semanticRelations) 207 | { 208 | this.semanticRelations = semanticRelations; 209 | } 210 | 211 | public Sparql copy() 212 | { 213 | Sparql spq = new Sparql(this.semanticRelations); 214 | for (Triple t : this.tripleList) 215 | spq.addTriple(t); 216 | return spq; 217 | } 218 | 219 | public void removeLastTriple() 220 | { 221 | int idx = tripleList.size()-1; 222 | score -= tripleList.get(idx).score; 223 | tripleList.remove(idx); 224 | } 225 | 226 | public Sparql removeAllTypeInfo () 227 | { 228 | score = 0; 229 | ArrayList newTripleList = new ArrayList(); 230 | for (Triple t : tripleList) 231 | { 232 | if (t.predicateID != Globals.pd.typePredicateID) 233 | { 234 | newTripleList.add(t); 235 | score += t.score; 236 | } 237 | } 238 | tripleList = newTripleList; 239 | return this; 240 | } 241 | 242 | }; 243 | -------------------------------------------------------------------------------- /src/qa/parsing/QuestionParsing.java: -------------------------------------------------------------------------------- 1 | package qa.parsing; 2 | 3 | import log.QueryLogger; 4 | import nlp.ds.DependencyTree; 5 | import nlp.ds.DependencyTreeNode; 6 | import nlp.ds.Word; 7 | import nlp.ds.Sentence.SentenceType; 8 | import qa.Globals; 9 | import rdf.Sparql; 10 | import rdf.Triple; 11 | 12 | public class QuestionParsing { 13 | public void process(QueryLogger qlog) { 14 | getDependenciesAndNER(qlog); 15 | recognizeSentenceType(qlog); 16 | } 17 | 18 | public void getDependenciesAndNER (QueryLogger qlog) { 19 | long t1 = System.currentTimeMillis(); 20 | try { 21 | qlog.s.dependencyTreeStanford = new DependencyTree(qlog.s, Globals.stanfordParser); 22 | }catch(Exception e){ 23 | e.printStackTrace(); 24 | } 25 | 26 | long t2 = System.currentTimeMillis(); 27 | try{ 28 | qlog.s.dependencyTreeMalt = new DependencyTree(qlog.s, Globals.maltParser); 29 | }catch(Exception e){ 30 | //if errors occur, abandon malt tree 31 | qlog.s.dependencyTreeMalt = qlog.s.dependencyTreeStanford; 32 | System.err.println("MALT parser error! Use stanford parser instead."); 33 | } 34 | 35 | try { 36 | long t3 = System.currentTimeMillis(); 37 | Globals.nerRecognizer.recognize(qlog.s); 38 | long t4 = System.currentTimeMillis(); 39 | System.out.println("====StanfordDependencies("+(t2-t1)+"ms)===="); 40 | System.out.println(qlog.s.dependencyTreeStanford); 41 | System.out.println("====MaltDependencies("+(t3-t2)+"ms)===="); 42 | System.out.println(qlog.s.dependencyTreeMalt); 43 | System.out.println("====NameEntityRecognition("+(t4-t3)+"ms)===="); 44 | qlog.s.printNERResult(); 45 | 46 | qlog.timeTable.put("StanfordParser", (int)(t2-t1)); 47 | qlog.timeTable.put("MaltParser", (int)(t3-t2)); 48 | qlog.timeTable.put("NER", (int)(t4-t3)); 49 | } catch (Exception e) { 50 | e.printStackTrace(); 51 | } 52 | } 53 | 54 | public void recognizeSentenceType(QueryLogger qlog) 55 | { 56 | boolean IsImperativeSentence = recognizeImperativeSentence(qlog.s.dependencyTreeStanford)|| 57 | recognizeImperativeSentence(qlog.s.dependencyTreeMalt); 58 | if (IsImperativeSentence) 59 | { 60 | qlog.s.sentenceType = SentenceType.ImperativeSentence; 61 | //two dependencyTree's ignored words should equal 62 | for (DependencyTreeNode sNode : qlog.s.dependencyTreeStanford.nodesList) 63 | for (DependencyTreeNode mNode : qlog.s.dependencyTreeMalt.nodesList) 64 | if (sNode.equals(mNode) && (sNode.word.isIgnored||mNode.word.isIgnored)) 65 | sNode.word.isIgnored = mNode.word.isIgnored = true; 66 | return; 67 | } 68 | 69 | boolean IsSpecialQuestion = recognizeSpecialQuestion(qlog.s.dependencyTreeStanford)|| 70 | recognizeSpecialQuestion(qlog.s.dependencyTreeMalt); 71 | if (IsSpecialQuestion) 72 | { 73 | qlog.s.sentenceType = SentenceType.SpecialQuestion; 74 | return; 75 | } 76 | 77 | boolean IsGeneralQuestion = recognizeGeneralQuestion(qlog.s.dependencyTreeStanford)|| 78 | recognizeGeneralQuestion(qlog.s.dependencyTreeMalt); 79 | if (IsGeneralQuestion) 80 | { 81 | qlog.s.sentenceType = SentenceType.GeneralQuestion; 82 | return; 83 | } 84 | 85 | //default is special 86 | qlog.s.sentenceType = SentenceType.SpecialQuestion; 87 | 88 | } 89 | 90 | //if imperative, omitting those polite words 91 | private boolean recognizeImperativeSentence(DependencyTree tree) { 92 | if(tree.getRoot().word.posTag.startsWith("V") || tree.getRoot().word.posTag.startsWith("NN")) { 93 | DependencyTreeNode dobj = null; 94 | DependencyTreeNode iobj = null; 95 | for (DependencyTreeNode n : tree.getRoot().childrenList) { 96 | if (n.dep_father2child.equals("dobj")) { 97 | dobj = n; 98 | } 99 | else if (n.dep_father2child.equals("iobj")) { 100 | iobj = n; 101 | } 102 | } 103 | if (dobj != null && iobj != null) { 104 | tree.getRoot().word.isIgnored = true; 105 | iobj.word.isIgnored = true; 106 | 107 | // give me a list of .. 108 | if (dobj.word.baseForm.equals("list")) 109 | { 110 | dobj.word.isIgnored = true; 111 | } 112 | 113 | return true; 114 | } 115 | 116 | //start with "List": List all games by GMT. 117 | if (dobj != null && tree.getRoot().word.baseForm.equals("list")) 118 | { 119 | //System.out.println("isListSentence!"); 120 | tree.getRoot().word.isIgnored = true; 121 | 122 | return true; 123 | } 124 | } 125 | return false; 126 | } 127 | 128 | private boolean recognizeSpecialQuestion(DependencyTree tree) 129 | { 130 | DependencyTreeNode firstNode = null; 131 | for (DependencyTreeNode dtn : tree.nodesList) 132 | if (dtn.word.position == 1) 133 | { 134 | firstNode = dtn; 135 | break; 136 | } 137 | //eg. In which city... 138 | if (firstNode!=null && 139 | (firstNode.word.posTag.equals("IN")||firstNode.word.posTag.equals("TO"))&& 140 | firstNode.dep_father2child.startsWith("prep")) 141 | { 142 | firstNode = null; 143 | for (DependencyTreeNode dtn : tree.nodesList) 144 | if (dtn.word.position == 2) 145 | { 146 | firstNode = dtn; 147 | break; 148 | } 149 | } 150 | 151 | if (firstNode != null) 152 | { 153 | if (firstNode.word.posTag.startsWith("W")) 154 | return true; 155 | } 156 | return false; 157 | } 158 | 159 | private boolean recognizeGeneralQuestion(DependencyTree tree) 160 | { 161 | DependencyTreeNode firstNode = null; 162 | for (DependencyTreeNode dtn : tree.nodesList) 163 | if (dtn.word.position == 1) 164 | { 165 | firstNode = dtn; 166 | break; 167 | } 168 | 169 | if (firstNode != null) 170 | { 171 | String dep = firstNode.dep_father2child; 172 | String pos = firstNode.word.posTag; 173 | String baseform = firstNode.word.baseForm; 174 | 175 | if ((baseform.equals("be")||baseform.equals("do")) && 176 | pos.startsWith("VB") && 177 | (dep.equals("root")||dep.equals("cop")||dep.startsWith("aux"))) 178 | return true; 179 | } 180 | return false; 181 | } 182 | 183 | public static String detectQuestionFocus(Sparql spq) { 184 | String ret = null; 185 | int posi = Integer.MAX_VALUE; 186 | for (Triple t : spq.tripleList) { 187 | 188 | if (!t.isSubjConstant()) { 189 | Word subj = t.getSubjectWord(); 190 | if (subj!=null && subj.position < posi) { 191 | posi = subj.position; 192 | ret = t.subject; 193 | } 194 | } 195 | if (!t.isObjConstant()) { 196 | Word obj = t.getObjectWord(); 197 | if (obj!=null && obj.position < posi) { 198 | posi = obj.position; 199 | ret = t.object; 200 | } 201 | } 202 | } 203 | if (ret != null) return ret.replace(' ', '_'); 204 | else return null; 205 | } 206 | } 207 | -------------------------------------------------------------------------------- /src/application/GanswerHandler.java: -------------------------------------------------------------------------------- 1 | package application; 2 | import java.io.IOException; 3 | 4 | import javax.servlet.ServletException; 5 | import javax.servlet.http.HttpServletRequest; 6 | import javax.servlet.http.HttpServletResponse; 7 | 8 | import log.QueryLogger; 9 | 10 | import org.json.*; 11 | import org.eclipse.jetty.server.Request; 12 | import org.eclipse.jetty.server.handler.AbstractHandler; 13 | 14 | import rdf.Sparql; 15 | import qa.GAnswer; 16 | import qa.Globals; 17 | import qa.Matches; 18 | 19 | public class GanswerHandler extends AbstractHandler{ 20 | public static String errorHandle(String status,String message,String question,QueryLogger qlog){ 21 | JSONObject exobj = new JSONObject(); 22 | try { 23 | exobj.put("status", status); 24 | exobj.put("message", message); 25 | exobj.put("question", question); 26 | if(qlog!=null&&qlog.rankedSparqls!=null&&qlog.rankedSparqls.size()>0){ 27 | exobj.put("sparql", qlog.rankedSparqls.get(0).toStringForGStore2()); 28 | } 29 | } catch (Exception e1) { 30 | } 31 | return exobj.toString(); 32 | } 33 | 34 | public void handle(String target, Request baseRequest, HttpServletRequest request, HttpServletResponse response) 35 | throws IOException, ServletException { 36 | String question = ""; 37 | QueryLogger qlog = null; 38 | try{ 39 | response.setContentType("text/html;charset=utf-8"); 40 | response.setStatus(HttpServletResponse.SC_OK); 41 | //step1: parsing input json 42 | String data = request.getParameter("data"); 43 | data = data.replace("%22","\""); 44 | JSONObject jsonobj = new JSONObject(); 45 | int needAnswer = 0; 46 | int needSparql = 1; 47 | question = "Something wrong if you see this."; 48 | jsonobj = new JSONObject(data); 49 | question = jsonobj.getString("question"); 50 | if(jsonobj.isNull("maxAnswerNum")){ 51 | needAnswer = GanswerHttp.maxAnswerNum; 52 | } 53 | else{ 54 | needAnswer = jsonobj.getInt("maxAnswerNum"); 55 | } 56 | if(jsonobj.isNull("maxSparqlNum")){ 57 | needSparql = GanswerHttp.maxSparqlNum; 58 | }else{ 59 | needSparql = jsonobj.getInt("maxSparqlNum"); 60 | } 61 | Globals.MaxAnswerNum = needAnswer; 62 | 63 | //step2 run GAnswer Logic 64 | String input = question; 65 | GAnswer ga = new GAnswer(); 66 | qlog = ga.getSparqlList(input); 67 | if(qlog == null || qlog.rankedSparqls == null){ 68 | try { 69 | baseRequest.setHandled(true); 70 | response.getWriter().println(errorHandle("500","InvalidQuestionException: the question you input is invalid, please check",question,qlog)); 71 | } catch (Exception e1) { 72 | } 73 | return; 74 | } 75 | int idx; 76 | 77 | //step2 construct response 78 | JSONObject resobj = new JSONObject(); 79 | resobj.put("status", "200"); 80 | resobj.put("question",jsonobj.getString("question")); 81 | JSONObject tmpobj = new JSONObject(); 82 | if(needAnswer > 0){ 83 | if(qlog!=null && qlog.rankedSparqls.size()!=0){ 84 | Sparql curSpq = null; 85 | Matches m = null; 86 | for(idx = 1;idx<=Math.min(qlog.rankedSparqls.size(), 5);idx+=1){ 87 | curSpq = qlog.rankedSparqls.get(idx-1); 88 | if(curSpq.tripleList.size()>0&&curSpq.questionFocus!=null){ 89 | m = ga.getAnswerFromGStore2(curSpq); 90 | } 91 | if(m!=null&&m.answers!=null){ 92 | qlog.sparql = curSpq; 93 | qlog.match = m; 94 | break; 95 | } 96 | } 97 | if(m==null||m.answers==null){ 98 | curSpq = ga.getUntypedSparql(curSpq); 99 | if(curSpq!=null){ 100 | m = ga.getAnswerFromGStore2(curSpq); 101 | } 102 | if(m!=null&&m.answers!=null){ 103 | qlog.sparql = curSpq; 104 | qlog.match = m; 105 | } 106 | } 107 | if(qlog.match==null) 108 | qlog.match=new Matches(); 109 | if(qlog.sparql==null) 110 | qlog.sparql = qlog.rankedSparqls.get(0); 111 | qlog.reviseAnswers(); 112 | 113 | //adding variables to result json 114 | JSONArray vararr = new JSONArray(); 115 | for(String var : qlog.sparql.variables){ 116 | vararr.put(var); 117 | } 118 | resobj.put("vars", vararr); 119 | 120 | //adding answers to result json 121 | JSONArray ansobj = new JSONArray(); 122 | JSONObject bindingobj; 123 | System.out.println(qlog.match.answersNum); 124 | for(int i=0;i0){ 146 | JSONArray spqarr = new JSONArray(); 147 | spqarr.put(qlog.sparql.toStringForGStore2()); 148 | for(idx=0;idxMain_project_directory 29 | >>Ganswer.jar
30 | >>unzipped files from Ganswer.jar
31 | >>data 32 | >>>unzipped files from dbpedia16.rar
33 | - Run the jar file 34 | ```java 35 | java -jar Ganswer.jar 36 | ``` 37 | - Wait for the initialization procedure. When you see "Server Ready!", the initialization is successful and you can access GAnswer service via Http requests. 38 | 39 | About GAnswer Http API, information can be found in Chapter 2.1.1 in help document. 40 | 41 | ### Use GAnswer via http request 42 | Here is an example of how to call GAnswer service via http request. 43 | Having started GAnswerHttp, you can activate GAnswer by url as follow: 44 | http://[ip]:[port]/gSolve/?data={maxAnswerNum:1, maxSparqlNum:2, question:Who is the wife of Donald Trump?} 45 |
46 | Here,[ip] and [port] is the ip and port number of GAnswerHttp service (the default port is 9999). By the "data" parameter in the url, you can send a json string to GAnswer. 47 | In this example, you are actually sending the following json data: 48 | ```json 49 | { 50 | "maxAnswerNum":"1", 51 | "maxSparqlNum":"2", 52 | "question":"Whos is the wife of Donald Trump?" 53 | } 54 | ``` 55 | Here, maxAnswerNum and maxSparqlNum respetively limit the number of answers and sparql the system will return. Both of them are optional. 56 | If everything goes well, GAnswer will return a json string containing system-generated sparql and corresponding answer. 57 | ```json 58 | { 59 | "question":"Who is the wife of Donald Trump?", 60 | "vars":["?wife"], 61 | "sparql":["select DISTINCT ?wife where { \t\t?wife. } LIMIT 1","select DISTINCT ?wife where { ?wife\t\t. } LIMIT 1"], 62 | "results":{"bindings":[{"?wife":{"type":"uri","value":""}}]}, 63 | "status":"200" 64 | } 65 | ``` 66 | For more detail, please check Chapter 2.1.1 of the user guide. 67 | 68 | ### Run GAnswer in Eclipse 69 | If you would like to run GAnswer in Eclipse, you need to clone or download the source code and import the project into Eclipse. Afterwards, the jar files in lib directory should be added to Build Path. 70 | Due to the sizes, these jar files can not be uploaded to github. Therefore, you can download them [here](https://disk.pku.edu.cn:443/link/AD36D72C28B3A581379EE2748B1A79E7). The extract code is 64jd. You can also download the lib zip through [Google Drive](https://drive.google.com/file/d/1tEsi4pBOBHd2gmwVgIOgt-ypJZQH9G3S). 71 | Meanwhile, dbpedia16.rar is also needed. Please unzipped it into directory named data under the project main directory. Parameters about data path can be found in qa.Globals.localPath. 72 | 73 | ### Notice 74 | To run GAnswer, you have to deal with multiple dependencies involving jar, data files and external API. Related information is in Chapter 2.4 in the help document. 75 | Having generated sparql querires, by default the system will access a remote gStore for answer, which means extra time may be needed.Therefore, we strongly recommend you to deploy gStore on your own server for best performance. 76 | 77 | - Download [DBpedia2016 triple file](https://pan.baidu.com/s/1l5Oui65sDn8QPYmA0rUvuA) and extract code is 89yy. 78 | - Deploy [gStore](http://gstore-pku.com) and use DBpedia2016 triple file to build your own database. What's worth mentioning is that the DBpedia 2016 triples file is about 9.9GB and the construction needs more than 10GB of main memory and costs more about 10 hours. 79 | 80 | ## Other Business 81 | 82 | You are welcome to use GAnswer and tell us your valuable advice or bug report. 83 | 84 | If your advice or report are accepted, your contribution will be recorded in our help document. 85 | 86 | We have published some paper about GAnswer and QA task, which you can find in Chapter 3.2 in help document. 87 | 88 | ## How to make your own data available on gAnswer 89 | You may have your own set of triples and want to put them into gAnswer.Then you should generate a new set of fragments from your own triples. We have a [detailed tutorial](genrate_fragments/How_to_generate_fragments.md) to help you out. 90 | 91 | -------------------------------------------------------------------------------- /genrate_fragments/How_to_generate_fragments.md: -------------------------------------------------------------------------------- 1 | ## How to generate fragments out of your own triples 2 | There are three kinds of fragments in gAnswer: entity fragments, predicate fragments and type fragments. They are information extracted from the triples helping gAnswer improve its results. In this section we will show you how to generate your own fragments step by step with a simple example 3 | 4 | ### Step 1: Clean the triple files 5 | Suppose we have a triple file containing only seven triples: 6 | ```java 7 | 8 | 9 | 10 | "Jeff" 11 | "Tom" 12 | 13 | 14 | 15 | ``` 16 | Generally speaking, there are three segment 17 | This is the exactly form of triples we need to generate fragments. However sometimes the entity and predicate contain some extra information. Take dbpedia dataset as an example. The following is the original form of a dbpedia triple 18 | ```java 19 | . 20 | ``` 21 | As you can see, every entity and predicate is marked with an URI, but we don't need the prefix of the URIs. See Step1_clean_triples.py. That is the code we use to clean dbpedia triples. 22 | Generally, please remember that making sure the entity and predicate names are clear enough to indicate their true meaning and contain no extra information is all you need to do in this step. 23 | By the way, if you have more than one triple files, please combine them into one so that the following steps will be easier. 24 | 25 | ### Step 2: remove duplicate triples 26 | One triple may occur more than once in the clean triple file, especially when you combine many triple files into one. 27 | gAnswer is OK with receiving duplicate triples but it will influence its performance. 28 | 29 | ### Step 3: extract entity, predicate and type name for id allocation 30 | To save space cost, the fragment files are not constructed based on entity, predicate and type names themselves but their ids. Therefore, we must extract every entity, predicate and type name out of the triple file and give them a uniue id respectively. In our example,the id files will goes like this: 31 | ```java 32 | //Entity ids 33 | 1 34 | 2 35 | 3 36 | 37 | //predicate ids 38 | 1 39 | 2 40 | 3 41 | 4 42 | 5 43 | 44 | //type ids 45 | 1 46 | 2 47 | ``` 48 | 49 | ### Step 4: represent triples with ids 50 | For convenience, before we generate the fragments, we first replace all the name strings in triple file with corresponding ids. 51 | In our example, the new triple file is like: 52 | ```java 53 | 1 1 3 54 | 2 2 1 55 | 1 4 -1 56 | 2 4 -1 57 | 1 3 1 58 | 2 3 1 59 | 3 3 2 60 | ``` 61 | Notice that we use -1 to represent values that a not entity nor type, such as numbers and literals. 62 | 63 | ### Step 5: generate entity fragments 64 | Finally we are going to generate entity fragments now. Every entity has its own piece of fragment.Fragments are information about the edges related with the entity as well as its neighbor entities.First let's clearify the idea of subject and object in a triple. A triple consist of three parts: subject, predicate and object. For example: 65 | ```java 66 | 67 | ``` 68 | Here *studentA* is subject, *major* is predicate and *computer_science* is object. Basically, the first element is subject, the second is predicate and the third is object. Sometimes it is the object, not an entity nor type. Value like number and string can also become object. 69 | 70 | We define 5 kinds of edges: 71 | 1.InEntEdge: The entity is the object of the edge and the subject is also an entity. 72 | 2.OutEntEdge: The entity is the subject of the edge and the object is also an entity. 73 | 3.InEdge: The entity is the object of the edge. 74 | 4.OutEdge: The entity is the subject of the edge. 75 | 5.typeEdge: The entity ts the subject of the edge whose predicate is *type* and its object is a type. 76 | 77 | Therefore, the structure of a piece of entity fragment is as follow: 78 | ```java 79 | | | | | 80 | ``` 81 | Between entity id and InEntEdge list, there should be a \t as divider. 82 | 83 | InEntEdge list and OutEntEdge list should be: 84 | ```java 85 | : ; ; ...... , : ; ; ...... 86 | ``` 87 | InEdge, OutEdge and Type list is similar but simpler. 88 | ```java 89 | , , ...... 90 | ``` 91 | 92 | Let's go back to our example. For entity *studentA*, its entity fragment should be: 93 | ```java 94 | 1 2:2;5 | 3:1 | 2 | 1,4 | 1 95 | ``` 96 | The id of *studentA* is 1. So at the beginning of the entity fragment we have a 1. Then we find InEntEdge, OutEntEdge, InEdge, OutEdge and Type list one by one and add them to the entity fragment. 97 | 98 | ### Step 6: Generate type fragment 99 | Given a specific type, type fragment contains three kinds of information: predicate ids in an InEdge of an entity of this type, predicate ids in an OutEdge of an entity of this type, and all the ids of entity of this type. The structure should be: 100 | ```java 101 | | | 102 | ``` 103 | In our example, the type fragement of *Person* should be: 104 | ```java 105 | 1 2,5 | 1,4 | 1,2 106 | ``` 107 | 108 | ### Step 7: Generate predicate fragment 109 | Given a specific predicate, there will be more than one piece of predicate fragment. Every piece of predicate fragment comes from a piece triple. We record the types that a predicate may accept as subject or object. Sometimes the object is not an entity and we use *literal* to denote this situation. 110 | The structure of a piece of predicate fragment is: 111 | ```java 112 | [] [ or "literal"] 113 | ``` 114 | For predicate *friend_of*, the predicate fragment should be: 115 | ```java 116 | [1] 2 [1] 117 | ``` 118 | 119 | For predicate *name*, the predicate fragment should be: 120 | ```java 121 | [1] 4 literal 122 | ``` 123 | 124 | Please notice that between type lists, predicate id and "literal", \t should be the divider. 125 | 126 | ### Step 8: Rebuild the lucene fragment for entity fragment and type short name 127 | This is the final step to make gAnswer run on our new data fragments. You can find the relative code under src/lcn/BuildIndexForEntityFragments.java and src/lucene/BuildIndexForTypeShortName.java. All you need to do is to import the project into eclipse and modify the file paths in the relative code and then run the main function in src/lcn/BuildIndexForEntityFragments.java and src/lucene/BuildIndexForTypeShortName.java. 128 | -------------------------------------------------------------------------------- /src/rdf/Triple.java: -------------------------------------------------------------------------------- 1 | package rdf; 2 | 3 | import nlp.ds.Word; 4 | import qa.Globals; 5 | 6 | public class Triple implements Comparable{ 7 | public String subject = null; // subject/object after disambiguation. 8 | public String object = null; 9 | 10 | static public int TYPE_ROLE_ID = -5; 11 | static public int VAR_ROLE_ID = -2; 12 | static public int CAT_ROLE_ID = -8; // Category 13 | static public String VAR_NAME = "?xxx"; 14 | 15 | // subjId/objId: entity id | TYPE_ROLE_ID | VAR_ROLE_ID 16 | public int subjId = -1; 17 | public int objId = -1; 18 | public int predicateID = -1; 19 | public Word subjWord = null; // only be used when semRltn == null 20 | public Word objWord = null; 21 | 22 | public SemanticRelation semRltn = null; 23 | public double score = 0; 24 | public boolean isSubjObjOrderSameWithSemRltn = true; 25 | public boolean isSubjObjOrderPrefered = false; 26 | 27 | public Word typeSubjectWord = null; // for "type" triples only 28 | 29 | public Triple (Triple t) { 30 | subject = t.subject; 31 | object = t.object; 32 | subjId = t.subjId; 33 | objId = t.objId; 34 | predicateID = t.predicateID; 35 | 36 | semRltn = t.semRltn; 37 | score = t.score; 38 | isSubjObjOrderSameWithSemRltn = t.isSubjObjOrderSameWithSemRltn; 39 | isSubjObjOrderPrefered = t.isSubjObjOrderPrefered; 40 | } 41 | 42 | // A final triple (subject/object order will not changed), does not rely on semantic relation (sr == null), from one word (type variable | embedded info) 43 | public Triple (int sId, String s, int p, int oId, String o, SemanticRelation sr, double sco) { 44 | subjId = sId; 45 | objId = oId; 46 | subject = s; 47 | predicateID = p; 48 | object = o; 49 | semRltn = sr; 50 | score = sco; 51 | } 52 | 53 | // A triple translated from a semantic relation (subject/object order can be changed in later) 54 | public Triple (int sId, String s, int p, int oId, String o, SemanticRelation sr, double sco, boolean isSwap) { 55 | subjId = sId; 56 | objId = oId; 57 | subject = s; 58 | predicateID = p; 59 | object = o; 60 | semRltn = sr; 61 | score = sco; 62 | isSubjObjOrderSameWithSemRltn = isSwap; 63 | } 64 | 65 | // A final triple (subject/object order will not changed), does not rely on semantic relation (sr == null), from two word (implicit relations of modifier) 66 | public Triple(int sId, String s, int p, int oId, String o, SemanticRelation sr, double sco, Word subj, Word obj) { 67 | subjId = sId; 68 | objId = oId; 69 | subject = s; 70 | predicateID = p; 71 | object = o; 72 | semRltn = sr; 73 | score = sco; 74 | subjWord = subj; 75 | objWord = obj; 76 | } 77 | 78 | public Triple copy() { 79 | Triple t = new Triple(this); 80 | return t; 81 | } 82 | 83 | public Triple copySwap() { 84 | Triple t = new Triple(this); 85 | String temp; 86 | int tmpId; 87 | 88 | tmpId = t.subjId; 89 | t.subjId = t.objId; 90 | t.objId = tmpId; 91 | 92 | temp = t.subject; 93 | t.subject = t.object; 94 | t.object = temp; 95 | 96 | t.isSubjObjOrderSameWithSemRltn = !this.isSubjObjOrderSameWithSemRltn; 97 | t.isSubjObjOrderPrefered = !this.isSubjObjOrderPrefered; 98 | 99 | return t; 100 | } 101 | 102 | public void addScore(double s) { 103 | score += s; 104 | } 105 | 106 | public double getScore() { 107 | return score; 108 | } 109 | 110 | @Override 111 | public int hashCode() 112 | { 113 | return new Integer(subjId).hashCode() ^ new Integer(objId).hashCode() ^ new Integer(predicateID).hashCode(); 114 | } 115 | 116 | @Override 117 | public String toString() { 118 | return subjId+":<" + subject + "> <" + Globals.pd.getPredicateById(predicateID) + "> "+objId+":<" + object + ">" + " : " + score; 119 | } 120 | 121 | public String toStringForGStore() { 122 | StringBuilder sb = new StringBuilder(""); 123 | 124 | String _subject = subject; 125 | if(_subject.startsWith("?")) 126 | sb.append(_subject+"\t"); 127 | else 128 | sb.append("<" + _subject + ">\t"); 129 | 130 | sb.append("<" + Globals.pd.getPredicateById(predicateID) + ">\t"); 131 | 132 | String _object; 133 | if(predicateID == Globals.pd.typePredicateID && object.contains("|")) 134 | _object = object.substring(0, object.indexOf('|')); 135 | else 136 | _object = object; 137 | if(_object.startsWith("?")) 138 | sb.append(_object); 139 | else 140 | sb.append("<" + _object + ">"); 141 | 142 | return sb.toString().replace(' ', '_'); 143 | } 144 | 145 | public String toStringWithoutScore() { 146 | return "<" + subject + "> <" + Globals.pd.getPredicateById(predicateID) + "> <" + object + ">"; 147 | } 148 | 149 | public Word getSubjectWord () { 150 | if (predicateID == Globals.pd.typePredicateID) { 151 | return typeSubjectWord; 152 | } 153 | else if(semRltn == null) 154 | { 155 | return subjWord; 156 | } 157 | else { 158 | if (isSubjObjOrderSameWithSemRltn) return semRltn.arg1Word; 159 | else return semRltn.arg2Word; 160 | } 161 | 162 | } 163 | 164 | public Word getObjectWord () { 165 | if (predicateID == Globals.pd.typePredicateID) { 166 | return typeSubjectWord; 167 | } 168 | else if(semRltn == null) 169 | { 170 | return objWord; 171 | } 172 | else { 173 | if (isSubjObjOrderSameWithSemRltn) return semRltn.arg2Word; 174 | else return semRltn.arg1Word; 175 | } 176 | } 177 | 178 | public boolean isSubjConstant () { 179 | if (predicateID == Globals.pd.typePredicateID) { 180 | return !subject.startsWith("?"); 181 | } 182 | else { 183 | // Triple from semantic (obvious) relation 184 | if(semRltn != null) 185 | { 186 | if (isSubjObjOrderSameWithSemRltn) return semRltn.isArg1Constant; 187 | else return semRltn.isArg2Constant; 188 | } 189 | // Triple from implicit relation (no semantic relation), it is final triple 190 | else 191 | { 192 | if(subjId != Triple.VAR_ROLE_ID && subjId != Triple.TYPE_ROLE_ID) 193 | return true; 194 | else 195 | return false; 196 | } 197 | } 198 | } 199 | 200 | public boolean isObjConstant () { 201 | if (predicateID == Globals.pd.typePredicateID) { 202 | return !object.startsWith("?"); 203 | } 204 | else { 205 | if(semRltn != null) 206 | { 207 | if (isSubjObjOrderSameWithSemRltn) return semRltn.isArg2Constant; 208 | else return semRltn.isArg1Constant; 209 | } 210 | else 211 | { 212 | if(objId != Triple.VAR_ROLE_ID && objId != Triple.TYPE_ROLE_ID) 213 | return true; 214 | else 215 | return false; 216 | } 217 | } 218 | } 219 | 220 | public int compareTo(Triple o) 221 | { 222 | //Order: Type, Ent&Ent, Ent&Var, Var&Var 223 | if(this.predicateID == Globals.pd.typePredicateID) 224 | { 225 | if(o.predicateID == Globals.pd.typePredicateID) 226 | return 0; 227 | else 228 | return -1; 229 | } 230 | int cnt1 = 0, cnt2 = 0; 231 | if(!this.subject.startsWith("?")) 232 | cnt1++; 233 | if(!this.object.startsWith("?")) 234 | cnt1++; 235 | if(!o.subject.startsWith("?")) 236 | cnt2++; 237 | if(!o.object.startsWith("?")) 238 | cnt2++; 239 | 240 | if(cnt1 == cnt2) 241 | return 0; 242 | else if(cnt1 > cnt2) 243 | return -1; 244 | else 245 | return 1; 246 | } 247 | 248 | public void swapSubjObjOrder() { 249 | String temp = subject; 250 | int tmpId = subjId; 251 | subject = object; 252 | subjId = objId; 253 | object = temp; 254 | objId = tmpId; 255 | isSubjObjOrderSameWithSemRltn = !isSubjObjOrderSameWithSemRltn; 256 | } 257 | }; -------------------------------------------------------------------------------- /src/nlp/tool/CoreNLP.java: -------------------------------------------------------------------------------- 1 | package nlp.tool; 2 | 3 | import java.util.List; 4 | import java.util.Properties; 5 | 6 | import nlp.ds.Word; 7 | import edu.stanford.nlp.ling.CoreAnnotations.LemmaAnnotation; 8 | import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; 9 | import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation; 10 | import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; 11 | import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; 12 | import edu.stanford.nlp.ling.CoreLabel; 13 | import edu.stanford.nlp.pipeline.Annotation; 14 | import edu.stanford.nlp.pipeline.StanfordCoreNLP; 15 | import edu.stanford.nlp.trees.Tree; 16 | import edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation; 17 | import edu.stanford.nlp.trees.semgraph.SemanticGraph; 18 | import edu.stanford.nlp.trees.semgraph.SemanticGraphCoreAnnotations.BasicDependenciesAnnotation; 19 | import edu.stanford.nlp.util.CoreMap; 20 | 21 | public class CoreNLP { 22 | 23 | // CoreNLP can also recognize TIME and NUMBER (see SUTime) 24 | private StanfordCoreNLP pipeline_lemma; 25 | 26 | public CoreNLP () { 27 | // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution 28 | /*Properties props_all = new Properties(); 29 | props_all.put("annotators", "tokenize, ssplit, pos, lemma, parse"); // full list: "tokenize, ssplit, pos, lemma, ner, parse, dcoref" 30 | pipeline_all = new StanfordCoreNLP(props_all);*/ 31 | 32 | Properties props_lemma = new Properties(); 33 | props_lemma.put("annotators", "tokenize, ssplit, pos, lemma"); 34 | pipeline_lemma = new StanfordCoreNLP(props_lemma); 35 | 36 | } 37 | 38 | // For more efficient usage, refer to "http://www.jarvana.com/jarvana/view/edu/stanford/nlp/stanford-corenlp/1.2.0/stanford-corenlp-1.2.0-javadoc.jar!/edu/stanford/nlp/process/Morphology.html" 39 | public String getBaseFormOfPattern (String text) { 40 | String ret = new String(""); 41 | 42 | // create an empty Annotation just with the given text 43 | Annotation document = new Annotation(text); 44 | // run all Annotators on this text 45 | pipeline_lemma.annotate(document); 46 | 47 | 48 | // these are all the sentences in this document 49 | // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types 50 | List sentences = document.get(SentencesAnnotation.class); 51 | 52 | int count = 0; 53 | for(CoreMap sentence: sentences) { 54 | // traversing the words in the current sentence 55 | // a CoreLabel is a CoreMap with additional token-specific methods 56 | for (CoreLabel token: sentence.get(TokensAnnotation.class)) { 57 | // this is the base form (lemma) of the token 58 | String lemma = token.getString(LemmaAnnotation.class); 59 | ret += lemma; 60 | ret += " "; 61 | } 62 | count ++; 63 | if (count % 100 == 0) { 64 | System.out.println(count); 65 | } 66 | } 67 | 68 | return ret.substring(0, ret.length()-1); 69 | } 70 | 71 | public SemanticGraph getBasicDependencies (String s) { 72 | // create an empty Annotation just with the given text 73 | Annotation document = new Annotation(s); 74 | 75 | // run all Annotators on this text 76 | pipeline_lemma.annotate(document); 77 | 78 | // these are all the sentences in this document 79 | // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types 80 | List sentences = document.get(SentencesAnnotation.class); 81 | 82 | for(CoreMap sentence: sentences) { 83 | // this is the Stanford dependency graph of the current sentence 84 | SemanticGraph dependencies = sentence.get(BasicDependenciesAnnotation.class); 85 | return dependencies; 86 | } 87 | 88 | return null; 89 | } 90 | 91 | public Tree getParseTree (String text) { 92 | // create an empty Annotation just with the given text 93 | Annotation document = new Annotation(text); 94 | 95 | // run all Annotators on this text 96 | pipeline_lemma.annotate(document); 97 | 98 | // these are all the sentences in this document 99 | // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types 100 | List sentences = document.get(SentencesAnnotation.class); 101 | 102 | for(CoreMap sentence: sentences) { 103 | // this is the parse tree of the current sentence 104 | return sentence.get(TreeAnnotation.class); 105 | } 106 | 107 | return null; 108 | } 109 | 110 | /** 111 | * How to use: 112 | * for (CoreLabel token : sentence.get(TokensAnnotation.class)) { 113 | * // this is the text of the token 114 | * String word = token.get(TextAnnotation.class); 115 | * // this is the POS tag of the token 116 | * String pos = token.get(PartOfSpeechAnnotation.class); 117 | * } 118 | * @param s 119 | * @return 120 | */ 121 | public CoreMap getPOS (String s) { 122 | // create an empty Annotation just with the given text 123 | Annotation document = new Annotation(s); 124 | 125 | // run all Annotators on this text 126 | pipeline_lemma.annotate(document); 127 | 128 | // these are all the sentences in this document 129 | // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types 130 | List sentences = document.get(SentencesAnnotation.class); 131 | 132 | for(CoreMap sentence: sentences) { 133 | // this is the sentence with POS Tags 134 | return sentence; 135 | } 136 | 137 | return null; 138 | } 139 | 140 | public Word[] getTaggedWords (String sentence) { 141 | CoreMap taggedSentence = getPOS(sentence); 142 | Word[] ret = new Word[taggedSentence.get(TokensAnnotation.class).size()]; 143 | int count = 0; 144 | for (CoreLabel token : taggedSentence.get(TokensAnnotation.class)) { 145 | // this is the text of the token 146 | String word = token.get(TextAnnotation.class); 147 | // this is the POS tag of the token 148 | String pos = token.get(PartOfSpeechAnnotation.class); 149 | //System.out.println(word+"["+pos+"]"); 150 | ret[count] = new Word(getBaseFormOfPattern(word.toLowerCase()), word, pos, count+1); 151 | count ++; 152 | } 153 | return ret; 154 | } 155 | 156 | /*public void demo () { 157 | // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution 158 | Properties props = new Properties(); 159 | props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); 160 | StanfordCoreNLP pipeline = new StanfordCoreNLP(props); 161 | 162 | // read some text in the text variable 163 | String text = ... // Add your text here! 164 | 165 | // create an empty Annotation just with the given text 166 | Annotation document = new Annotation(text); 167 | 168 | // run all Annotators on this text 169 | pipeline.annotate(document); 170 | 171 | // these are all the sentences in this document 172 | // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types 173 | List sentences = document.get(SentencesAnnotation.class); 174 | 175 | for(CoreMap sentence: sentences) { 176 | // traversing the words in the current sentence 177 | // a CoreLabel is a CoreMap with additional token-specific methods 178 | for (CoreLabel token: sentence.get(TokensAnnotation.class)) { 179 | // this is the text of the token 180 | String word = token.get(TextAnnotation.class); 181 | // this is the POS tag of the token 182 | String pos = token.get(PartOfSpeechAnnotation.class); 183 | // this is the NER label of the token 184 | String ne = token.get(NamedEntityTagAnnotation.class); 185 | } 186 | 187 | // this is the parse tree of the current sentence 188 | Tree tree = sentence.get(TreeAnnotation.class); 189 | 190 | // this is the Stanford dependency graph of the current sentence 191 | SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class); 192 | } 193 | 194 | // This is the coreference link graph 195 | // Each chain stores a set of mentions that link to each other, 196 | // along with a method for getting the most representative mention 197 | // Both sentence and token offsets start at 1! 198 | Map graph = 199 | document.get(CorefChainAnnotation.class); 200 | }*/ 201 | } 202 | -------------------------------------------------------------------------------- /src/addition/AddtionalFix.java: -------------------------------------------------------------------------------- 1 | package addition; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | 6 | import paradict.PredicateIDAndSupport; 7 | import log.QueryLogger; 8 | import nlp.ds.Word; 9 | import nlp.ds.Sentence.SentenceType; 10 | import qa.Globals; 11 | import rdf.SemanticUnit; 12 | import rdf.Sparql; 13 | import rdf.Sparql.QueryType; 14 | import rdf.Triple; 15 | 16 | 17 | public class AddtionalFix 18 | { 19 | public HashMap pattern2category = new HashMap(); 20 | 21 | public AddtionalFix() 22 | { 23 | // Some category mappings for DBpedia, try automatic linking methods later. | base form 24 | pattern2category.put("gangster_from_the_prohibition_era", "Prohibition-era_gangsters"); 25 | pattern2category.put("seven_wonder_of_the_ancient_world", "Seven_Wonders_of_the_Ancient_World"); 26 | pattern2category.put("three_ship_use_by_columbus", "Christopher_Columbus"); 27 | pattern2category.put("13_british_colony", "Thirteen_Colonies"); 28 | } 29 | 30 | public void process(QueryLogger qlog) 31 | { 32 | fixCategory(qlog); 33 | oneTriple(qlog); 34 | oneNode(qlog); 35 | 36 | //aggregation 37 | AggregationRecognition ar = new AggregationRecognition(); 38 | ar.recognize(qlog); 39 | 40 | //query type 41 | decideQueryType(qlog); 42 | } 43 | 44 | public void decideQueryType(QueryLogger qlog) 45 | { 46 | for(Sparql spq: qlog.rankedSparqls) 47 | if(qlog.s.sentenceType == SentenceType.GeneralQuestion) 48 | spq.queryType = QueryType.Ask; 49 | } 50 | 51 | public void fixCategory(QueryLogger qlog) 52 | { 53 | if(qlog == null || qlog.semanticUnitList == null) 54 | return; 55 | 56 | String var = null, category = null; 57 | for(SemanticUnit su: qlog.semanticUnitList) 58 | { 59 | if(su.centerWord.mayCategory) 60 | { 61 | var = "?"+su.centerWord.originalForm; 62 | category = su.centerWord.category; 63 | } 64 | } 65 | 66 | if(category != null && var != null) 67 | for(Sparql spq: qlog.rankedSparqls) 68 | { 69 | boolean occured = false; 70 | for(Triple tri: spq.tripleList) 71 | { 72 | if(tri.subject.equals(var)) 73 | { 74 | occured = true; 75 | break; 76 | } 77 | } 78 | String oName = category; 79 | String pName = "subject"; 80 | int pid = Globals.pd.predicate_2_id.get(pName); 81 | Triple triple = new Triple(Triple.VAR_ROLE_ID, var, pid, Triple.CAT_ROLE_ID, oName, null, 100); 82 | spq.addTriple(triple); 83 | } 84 | } 85 | 86 | /* recognize one-Node query 87 | * Two cases:1、Special question|Imperative sentence 2、General question 88 | * 1-1:how many [], highest [] ... | For single variable, add constraint (aggregation) 89 | * 1-2: What is backgammon? | What is a bipolar syndrome? | Search an entity (return itself or its type/description ...) 90 | * 1-3: Give me all Seven Wonders of the Ancient World. | Notice, "Seven Wonders of the Ancient World" should be recognized as ENT before. (in fact it is CATEGORY in DBpeida) 91 | * 2-1: Are there any [castles_in_the_United_States](yago:type) 92 | * 2-2:Was Sigmund Freud married? | Lack of variable node. 93 | * 2-3:Are penguins endangered? | No suitable relation matching, need transition. 94 | */ 95 | public void oneNode(QueryLogger qlog) 96 | { 97 | if(qlog == null || qlog.semanticUnitList == null || qlog.semanticUnitList.size()>1) 98 | return; 99 | 100 | Word target = qlog.target; 101 | Word[] words = qlog.s.words; 102 | if(qlog.s.sentenceType != SentenceType.GeneralQuestion) 103 | { 104 | //1-1: how many [type] are there | List all [type] 105 | if(target.mayType && target.tmList != null) 106 | { 107 | String subName = "?"+target.originalForm; 108 | String typeName = target.tmList.get(0).typeName; 109 | Triple triple = new Triple(Triple.VAR_ROLE_ID, subName, Globals.pd.typePredicateID, Triple.TYPE_ROLE_ID, typeName, null, 100); 110 | Sparql sparql = new Sparql(); 111 | sparql.addTriple(triple); 112 | qlog.rankedSparqls.add(sparql); 113 | } 114 | //1-2: What is [ent]? 115 | else if(target.mayEnt && target.emList != null) 116 | { 117 | if(words.length >= 3 && words[0].baseForm.equals("what") && words[1].baseForm.equals("be")) 118 | { 119 | int eid = target.emList.get(0).entityID; 120 | String subName = target.emList.get(0).entityName; 121 | Triple triple = new Triple(eid, subName, Globals.pd.typePredicateID, Triple.VAR_ROLE_ID, "?"+target.originalForm, null, target.emList.get(0).score); 122 | Sparql sparql = new Sparql(); 123 | sparql.addTriple(triple); 124 | qlog.rankedSparqls.add(sparql); 125 | } 126 | } 127 | //1-3: Give me all Seven Wonders of the Ancient World. 128 | else if(target.mayCategory && target.category != null) 129 | { 130 | String oName = target.category; 131 | String pName = "subject"; 132 | int pid = Globals.pd.predicate_2_id.get(pName); 133 | Triple triple = new Triple(Triple.VAR_ROLE_ID, "?"+target.originalForm, pid, Triple.CAT_ROLE_ID, oName, null, 100); 134 | Sparql sparql = new Sparql(); 135 | sparql.addTriple(triple); 136 | qlog.rankedSparqls.add(sparql); 137 | } 138 | } 139 | else 140 | { 141 | if(target.mayEnt && target.emList != null) 142 | { 143 | //2-2:Was Sigmund Freud married? 144 | String relMention = ""; 145 | for(Word word: words) 146 | if(word != target && !word.baseForm.equals(".") && !word.baseForm.equals("?")) 147 | relMention += word.baseForm+" "; 148 | if(relMention.length() > 1) 149 | relMention = relMention.substring(0, relMention.length()-1); 150 | 151 | ArrayList pmList = null; 152 | if(Globals.pd.nlPattern_2_predicateList.containsKey(relMention)) 153 | pmList = Globals.pd.nlPattern_2_predicateList.get(relMention); 154 | 155 | if(pmList != null && pmList.size() > 0) 156 | { 157 | int pid = pmList.get(0).predicateID; 158 | int eid = target.emList.get(0).entityID; 159 | String subName = target.emList.get(0).entityName; 160 | Triple triple = new Triple(eid, subName, pid, Triple.VAR_ROLE_ID, "?x", null, 100); 161 | Sparql sparql = new Sparql(); 162 | sparql.addTriple(triple); 163 | qlog.rankedSparqls.add(sparql); 164 | } 165 | 166 | //2-3:Are penguins endangered? 167 | else 168 | { 169 | if(target.position < words.length && pattern2category.containsKey(words[target.position].baseForm)) 170 | { 171 | String oName = pattern2category.get(words[target.position].baseForm); 172 | String pName = "subject"; 173 | int pid = Globals.pd.predicate_2_id.get(pName); 174 | int eid = target.emList.get(0).entityID; 175 | String subName = target.emList.get(0).entityName; 176 | Triple triple = new Triple(eid, subName, pid, Triple.CAT_ROLE_ID, oName, null, 100); 177 | Sparql sparql = new Sparql(); 178 | sparql.addTriple(triple); 179 | qlog.rankedSparqls.add(sparql); 180 | } 181 | } 182 | } 183 | //2-1: Are there any [castles_in_the_United_States](yago:type) 184 | else if(target.mayType && target.tmList != null) 185 | { 186 | String typeName = target.tmList.get(0).typeName; 187 | String subName = "?" + target.originalForm; 188 | //System.out.println("typeName="+typeName+" subName="+subName); 189 | Triple triple = new Triple(Triple.VAR_ROLE_ID, subName, Globals.pd.typePredicateID, Triple.TYPE_ROLE_ID, typeName, null, 100); 190 | Sparql sparql = new Sparql(); 191 | sparql.addTriple(triple); 192 | qlog.rankedSparqls.add(sparql); 193 | } 194 | } 195 | } 196 | 197 | /* 198 | * One triple recognized but no suitable relation. 199 | * */ 200 | public void oneTriple (QueryLogger qlog) 201 | { 202 | if(qlog == null || qlog.semanticUnitList == null) 203 | return; 204 | 205 | if(qlog.s.sentenceType == SentenceType.SpecialQuestion) 206 | { 207 | Word[] words = qlog.s.words; 208 | if(qlog.semanticUnitList.size() == 2) 209 | { 210 | Word entWord = null, whWord = null; 211 | for(int i=0;i= 3 && words[0].baseForm.equals("what") && words[1].baseForm.equals("be")) 220 | { 221 | int eid = entWord.emList.get(0).entityID; 222 | String subName = entWord.emList.get(0).entityName; 223 | Triple triple = new Triple(eid, subName, Globals.pd.typePredicateID, Triple.VAR_ROLE_ID, "?"+whWord.originalForm, null, entWord.emList.get(0).score); 224 | Sparql sparql = new Sparql(); 225 | sparql.addTriple(triple); 226 | qlog.rankedSparqls.add(sparql); 227 | } 228 | } 229 | } 230 | } 231 | } 232 | 233 | -------------------------------------------------------------------------------- /src/fgmt/EntityFragment.java: -------------------------------------------------------------------------------- 1 | package fgmt; 2 | 3 | import java.io.IOException; 4 | import java.util.ArrayList; 5 | import java.util.Collections; 6 | import java.util.HashMap; 7 | import java.util.HashSet; 8 | 9 | import rdf.EntityMapping; 10 | import lcn.EntityFragmentFields; 11 | import lcn.EntityNameAndScore; 12 | import lcn.SearchInEntityFragments; 13 | 14 | public class EntityFragment extends Fragment { 15 | 16 | public int eId; 17 | public HashSet inEdges = new HashSet(); 18 | public HashSet outEdges = new HashSet(); 19 | public HashSet types = new HashSet(); 20 | 21 | // in/out entity and the connected edges. Eg, , then outEntMap of eId contains > 22 | public HashMap> inEntMap = new HashMap>(); // notice the input file should no redundant triple. 23 | public HashMap> outEntMap = new HashMap>(); 24 | 25 | static double thres1 = 0.4; 26 | static double thres2 = 0.8; 27 | static int thres3 = 3; 28 | static int k = 50; 29 | 30 | /** 31 | * mention to entity using Lucene index. 32 | * 33 | * rule: 34 | * select top-k results of each phrase. 35 | * (1)if current lowest score < thres1, drop those score < thres1. 36 | * (2)if current lowest score > thres2, add those score > thres2. 37 | * 38 | * exact match: 39 | * (1)Lucene score = 1. 40 | * (2)String match (lowercase): edit distance <= thres3. 41 | * 42 | * score: 43 | * use Lucene score directly. 44 | * 45 | * @param phrase 46 | * @return 47 | */ 48 | public static HashMap getCandEntityNames2(String phrase) { 49 | 50 | HashMap ret = new HashMap(); 51 | ArrayList list1 = getCandEntityNames_subject(phrase, thres1, thres2, k); 52 | 53 | if(list1 == null) 54 | return ret; 55 | 56 | int iter_size = 0; 57 | if (list1.size() <= k) { 58 | iter_size = list1.size(); 59 | } 60 | else if (list1.size() > k) { 61 | if (list1.get(k-1).score >= thres2) { 62 | iter_size = list1.size(); 63 | } 64 | else { 65 | iter_size = k; 66 | } 67 | } 68 | for(int i = 0; i < iter_size; i ++) { 69 | if (i < k) { 70 | ret.put(list1.get(i).entityID, getScore(phrase, list1.get(i).entityName, list1.get(i).score)); 71 | } 72 | else if (list1.get(i).score >= thres2) { 73 | ret.put(list1.get(i).entityID, getScore(phrase, list1.get(i).entityName, list1.get(i).score)); 74 | } 75 | else { 76 | break; 77 | } 78 | } 79 | 80 | return ret; 81 | } 82 | 83 | public static ArrayList getEntityMappingList (String n) 84 | { 85 | HashMap map = getCandEntityNames2(n); 86 | ArrayList ret = new ArrayList(); 87 | for (int eid : map.keySet()) 88 | { 89 | String s = EntityFragmentFields.entityId2Name.get(eid); 90 | ret.add(new EntityMapping(eid, s, map.get(eid))); 91 | } 92 | Collections.sort(ret); 93 | return ret; 94 | } 95 | 96 | public static double getScore (String s1, String s2, double luceneScore) { 97 | double ret = luceneScore*100.0/(Math.log(calEditDistance(s1, s2)*1.5+1)+1); 98 | return ret; 99 | } 100 | 101 | /** 102 | * Edit distance (all lowercase) 103 | * @param s1 104 | * @param s2 105 | * @return 106 | */ 107 | public static int calEditDistance (String s1, String s2) { 108 | s1 = s1.toLowerCase(); 109 | s2 = s2.toLowerCase(); 110 | 111 | int d[][]; 112 | int n = s1.length(); 113 | int m = s2.length(); 114 | int i, j, temp; 115 | char ch1, ch2; 116 | 117 | if(n == 0) { 118 | return m; 119 | } 120 | if(m == 0) { 121 | return n; 122 | } 123 | 124 | d = new int[n+1][m+1]; 125 | for(i=0; i<=n; i++) { 126 | d[i][0] = i; 127 | } 128 | for(j=0; j<=m; j++) { 129 | d[0][j] = j; 130 | } 131 | 132 | for(i=1; i<=n; i++) { 133 | ch1 = s1.charAt(i-1); 134 | for(j=1; j<=m; j++) { 135 | ch2 = s2.charAt(j-1); 136 | if(ch1 == ch2) { 137 | temp = 0; 138 | } else { 139 | temp = 1; 140 | } 141 | d[i][j] = min(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]+temp); 142 | } 143 | } 144 | 145 | return d[n][m]; 146 | } 147 | 148 | private static int min(int a, int b, int c) { 149 | int ab = a getCandEntityNames_subject(String phrase, double thres1, double thres2, int k) { 154 | SearchInEntityFragments sf = new SearchInEntityFragments(); 155 | //System.out.println("EntityFragment.getCandEntityNames_subject() ..."); 156 | 157 | ArrayList ret_sf = null; 158 | try { 159 | ret_sf = sf.searchName(phrase, thres1, thres2, k); 160 | } catch (IOException e) { 161 | //e.printStackTrace(); 162 | System.err.println("Reading lcn index error"); 163 | } 164 | 165 | return ret_sf; 166 | } 167 | 168 | public static EntityFragment getEntityFragmentByEntityId(Integer entityId) 169 | { 170 | if(!EntityFragmentFields.entityFragmentString.containsKey(entityId)) 171 | return null; 172 | String fgmt = EntityFragmentFields.entityFragmentString.get(entityId); 173 | EntityFragment ef = new EntityFragment(entityId, fgmt); 174 | return ef; 175 | } 176 | 177 | public static String getEntityFgmtStringByName(String entityName) 178 | { 179 | int id = EntityFragmentFields.entityName2Id.get(entityName); 180 | String fgmt = EntityFragmentFields.entityFragmentString.get(id); 181 | return fgmt; 182 | } 183 | 184 | public EntityFragment(int eid, String fgmt) 185 | { 186 | eId = eid; 187 | fragmentType = typeEnum.ENTITY_FRAGMENT; 188 | 189 | //eg: 11 |3961112:2881;410;,4641020:2330;, 190 | fgmt = fgmt.replace('|', '#'); 191 | String[] fields = fgmt.split("#"); 192 | 193 | if(fields.length > 0 && fields[0].length() > 0) 194 | { 195 | String[] entEdgesArr = fields[0].split(","); 196 | for(int i = 0; i < entEdgesArr.length; i ++) 197 | { 198 | String[] nums = entEdgesArr[i].split(":"); 199 | if(nums.length != 2) 200 | continue; 201 | int intEntId = Integer.valueOf(nums[0]); 202 | String[] intEdges = nums[1].split(";"); 203 | ArrayList intEdgeList = new ArrayList(); 204 | for(String outEdge: intEdges) 205 | { 206 | intEdgeList.add(Integer.valueOf(outEdge)); 207 | } 208 | if(intEdgeList.size()>0) 209 | inEntMap.put(intEntId, intEdgeList); 210 | } 211 | } 212 | 213 | if(fields.length > 1 && fields[1].length() > 0) 214 | { 215 | String[] entEdgesArr = fields[1].split(","); 216 | for(int i = 0; i < entEdgesArr.length; i ++) 217 | { 218 | String[] nums = entEdgesArr[i].split(":"); 219 | if(nums.length != 2) 220 | continue; 221 | int outEntId = Integer.valueOf(nums[0]); 222 | String[] outEdges = nums[1].split(";"); 223 | ArrayList outEdgeList = new ArrayList(); 224 | for(String outEdge: outEdges) 225 | { 226 | outEdgeList.add(Integer.valueOf(outEdge)); 227 | } 228 | if(outEdgeList.size()>0) 229 | outEntMap.put(outEntId, outEdgeList); 230 | } 231 | } 232 | 233 | if(fields.length > 2 && fields[2].length() > 0) { 234 | String[] nums = fields[2].split(","); 235 | for(int i = 0; i < nums.length; i ++) { 236 | if (nums[i].length() > 0) { 237 | inEdges.add(Integer.parseInt(nums[i])); 238 | } 239 | } 240 | } 241 | if(fields.length > 3 && fields[3].length() > 0) { 242 | String[] nums = fields[3].split(","); 243 | for(int i = 0; i < nums.length; i ++) { 244 | if (nums[i].length() > 0) { 245 | outEdges.add(Integer.parseInt(nums[i])); 246 | } 247 | } 248 | } 249 | if(fields.length > 4 && fields[4].length() > 0) { 250 | String[] nums = fields[4].split(","); 251 | for(int i = 0; i < nums.length; i ++) { 252 | if (nums[i].length() > 0) { 253 | types.add(Integer.parseInt(nums[i])); 254 | } 255 | } 256 | } 257 | } 258 | 259 | @Override 260 | public String toString() 261 | { 262 | StringBuilder ret = new StringBuilder(""); 263 | for(Integer inEnt: inEntMap.keySet()) 264 | { 265 | ArrayList inEdgeList = inEntMap.get(inEnt); 266 | if(inEdgeList==null || inEdgeList.size()==0) 267 | continue; 268 | ret.append(inEnt+":"); 269 | for(int inEdge: inEdgeList) 270 | ret.append(inEdge+";"); 271 | ret.append(","); 272 | } 273 | ret.append('|'); 274 | for(Integer outEnt: outEntMap.keySet()) 275 | { 276 | ArrayList outEdgeList = outEntMap.get(outEnt); 277 | if(outEdgeList==null || outEdgeList.size()==0) 278 | continue; 279 | ret.append(outEnt+":"); 280 | for(int outEdge: outEdgeList) 281 | ret.append(outEdge+";"); 282 | ret.append(","); 283 | } 284 | ret.append('|'); 285 | for(Integer p : inEdges) { 286 | ret.append(p); 287 | ret.append(','); 288 | } 289 | ret.append('|'); 290 | for(Integer p : outEdges) { 291 | ret.append(p); 292 | ret.append(','); 293 | } 294 | ret.append('|'); 295 | for(Integer t : types) { 296 | ret.append(t); 297 | ret.append(','); 298 | } 299 | return ret.toString(); 300 | } 301 | } 302 | -------------------------------------------------------------------------------- /src/qa/GAnswer.java: -------------------------------------------------------------------------------- 1 | package qa; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collections; 5 | import java.util.List; 6 | 7 | import jgsc.GstoreConnector; 8 | import log.QueryLogger; 9 | import nlp.ds.Sentence; 10 | import nlp.ds.Sentence.SentenceType; 11 | import qa.parsing.QuestionParsing; 12 | import qa.parsing.BuildQueryGraph; 13 | import rdf.Sparql; 14 | import utils.FileUtil; 15 | import addition.AddtionalFix; 16 | import qa.Globals; 17 | 18 | public class GAnswer { 19 | 20 | public static final int MAX_SPQ_NUM = 3; 21 | 22 | public static void init() { 23 | System.out.println("gAnswer2 init ..."); 24 | 25 | Globals.init(); 26 | 27 | System.out.println("gAnswer2 init ... ok!"); 28 | } 29 | 30 | public QueryLogger getSparqlList(String input) 31 | { 32 | QueryLogger qlog = null; 33 | try 34 | { 35 | if (input.length() <= 5) 36 | return null; 37 | 38 | System.out.println("[Input:] "+input); 39 | 40 | // step 0: Node (entity & type & literal) Recognition 41 | long t0 = System.currentTimeMillis(), t, NRtime; 42 | Query query = new Query(input); 43 | qlog = new QueryLogger(query); 44 | ArrayList rankedSparqls = new ArrayList(); 45 | NRtime = (int)(System.currentTimeMillis()-t0); 46 | System.out.println("step0 [Node Recognition] : "+ NRtime +"ms"); 47 | 48 | // Try to solve each NR plan, and combine the ranked SPARQLs. 49 | // We only reserve LOG of BEST NR plan for convenience. 50 | for(int i=query.sList.size()-1; i>=0; i--) 51 | { 52 | Sentence possibleSentence = query.sList.get(i); 53 | qlog.reloadSentence(possibleSentence); 54 | // qlog.isMaltParserUsed = true; 55 | 56 | // LOG 57 | System.out.println("transQ: "+qlog.s.plainText); 58 | qlog.NRlog = query.preLog; 59 | qlog.SQGlog = "Id: "+query.queryId+"\nQuery: "+query.NLQuestion+"\n"; 60 | qlog.SQGlog += qlog.NRlog; 61 | qlog.timeTable.put("step0", (int)NRtime); 62 | 63 | // step 1: question parsing (dependency tree, sentence type) 64 | t = System.currentTimeMillis(); 65 | QuestionParsing step1 = new QuestionParsing(); 66 | step1.process(qlog); 67 | qlog.timeTable.put("step1", (int)(System.currentTimeMillis()-t)); 68 | 69 | // step 2: build query graph (structure construction, relation extraction, top-k join) 70 | t = System.currentTimeMillis(); 71 | BuildQueryGraph step2 = new BuildQueryGraph(); 72 | step2.process(qlog); 73 | qlog.timeTable.put("step2", (int)(System.currentTimeMillis()-t)); 74 | 75 | // step 3: some fix (such as "one-node" or "ask-one-triple") and aggregation 76 | t = System.currentTimeMillis(); 77 | AddtionalFix step3 = new AddtionalFix(); 78 | step3.process(qlog); 79 | 80 | // Collect SPARQLs. 81 | rankedSparqls.addAll(qlog.rankedSparqls); 82 | qlog.timeTable.put("step3", (int)(System.currentTimeMillis()-t)); 83 | } 84 | 85 | // deduplicate in SPARQL 86 | for(Sparql spq: rankedSparqls) 87 | spq.deduplicate(); 88 | 89 | // Sort (descending order). 90 | Collections.sort(rankedSparqls); 91 | qlog.rankedSparqls = rankedSparqls; 92 | System.out.println("number of rankedSparqls = " + qlog.rankedSparqls.size()); 93 | 94 | // Detect question focus. 95 | for (int i=0; i inputList = FileUtil.readFile("E:/Linyinnian/qald6_special.txt"); 203 | for(String input: inputList) 204 | { 205 | ArrayList outputs = new ArrayList(); 206 | ArrayList spqs = new ArrayList(); 207 | spqs.add("id:"+String.valueOf(i)); 208 | i++; 209 | 210 | long parsing_st_time = System.currentTimeMillis(); 211 | 212 | QueryLogger qlog = ga.getSparqlList(input); 213 | if(qlog == null || qlog.rankedSparqls == null) 214 | continue; 215 | 216 | long parsing_ed_time = System.currentTimeMillis(); 217 | System.out.println("Question Understanding time: "+ (int)(parsing_ed_time - parsing_st_time)+ "ms"); 218 | System.out.println("TripleCheck time: "+ qlog.timeTable.get("TripleCheck") + "ms"); 219 | System.out.println("SparqlCheck time: "+ qlog.timeTable.get("SparqlCheck") + "ms"); 220 | System.out.println("Ranked Sparqls: " + qlog.rankedSparqls.size()); 221 | 222 | outputs.add(qlog.SQGlog); 223 | outputs.add(qlog.SQGlog + "Building HQG time: "+ (qlog.timeTable.get("step0")+qlog.timeTable.get("step1")+qlog.timeTable.get("step2")-qlog.timeTable.get("BQG_topkjoin")) + "ms"); 224 | outputs.add("TopKjoin time: "+ qlog.timeTable.get("BQG_topkjoin") + "ms"); 225 | outputs.add("Question Understanding time: "+ (int)(parsing_ed_time - parsing_st_time)+ "ms"); 226 | 227 | long excuting_st_time = System.currentTimeMillis(); 228 | Matches m = null; 229 | System.out.println("[RESULT]"); 230 | ArrayList lastSpqList = new ArrayList(); 231 | int idx; 232 | // Consider top-5 SPARQLs 233 | for(idx=1; idx<=Math.min(qlog.rankedSparqls.size(), 5); idx++) 234 | { 235 | Sparql curSpq = qlog.rankedSparqls.get(idx-1); 236 | String stdSPQwoPrefix = ga.getStdSparqlWoPrefix(qlog, curSpq); 237 | lastSpqList.add(stdSPQwoPrefix); 238 | 239 | System.out.println("[" + idx + "]" + "score=" + curSpq.score); 240 | System.out.println(stdSPQwoPrefix); 241 | 242 | // Print top-3 SPARQLs to file. 243 | if(idx <= MAX_SPQ_NUM) 244 | // spqs.add("[" + idx + "]" + "score=" + curSpq.score + "\n" + stdSPQwoPrefix); 245 | outputs.add("[" + idx + "]" + "score=" + curSpq.score + "\n" + stdSPQwoPrefix); 246 | 247 | // // Execute by Virtuoso or GStore when answers not found 248 | if(m == null || m.answers == null) 249 | { 250 | if(curSpq.tripleList.size()>0 && curSpq.questionFocus!=null) 251 | { 252 | m = ga.getAnswerFromGStore2(curSpq); 253 | } 254 | if(m != null && m.answers != null) 255 | { 256 | // Found results using current SPQ, then we can break and print result. 257 | qlog.sparql = curSpq; 258 | qlog.match = m; 259 | qlog.reviseAnswers(); 260 | System.out.println("Query Executing time: "+ (int)(System.currentTimeMillis() - excuting_st_time)+ "ms"); 261 | } 262 | } 263 | } 264 | 265 | // Some TYPEs can be omitted, (such as ) 266 | if(!qlog.rankedSparqls.isEmpty()) 267 | { 268 | Sparql untypedSparql = ga.getUntypedSparql(qlog.rankedSparqls.get(0)); 269 | if(untypedSparql != null) 270 | { 271 | String stdSPQwoPrefix = ga.getStdSparqlWoPrefix(qlog, untypedSparql); 272 | if(!lastSpqList.contains(stdSPQwoPrefix)) 273 | // spqs.add("[" + Math.min(MAX_SPQ_NUM+1, idx) + "]" + "score=" + 1000 + "\n" + stdSPQwoPrefix + "\n"); 274 | outputs.add("[" + Math.min(MAX_SPQ_NUM+1, idx) + "]" + "score=" + 1000 + "\n" + stdSPQwoPrefix + "\n"); 275 | } 276 | } 277 | 278 | FileUtil.writeFile(outputs, "E:/Linyinnian/qald6_special_out.txt", true); 279 | } 280 | 281 | } 282 | } 283 | --------------------------------------------------------------------------------