├── README.md ├── pom.xml └── src ├── main └── java │ └── com │ └── fengcone │ └── phasmida │ └── parsing │ ├── ParsingConstant.java │ ├── ParsingContext.java │ ├── ParsingNode.java │ ├── ParsingTree.java │ └── response │ ├── AnalyzerResponse.java │ └── AnalyzerResponseUtil.java └── test └── java └── com └── fengcone └── phasmida └── parsing ├── ParsingAddressTest.java └── rules.data /README.md: -------------------------------------------------------------------------------- 1 | # phasmida-parsing 2 | chinese-address-parsing 3 | String address = "陕西省榆林市大柳塔"; 4 | ParsingContext context = new ParsingContext(); 5 | context.setParsingString(address); 6 | context.parsing(); 7 | AnalyzerResponse analyzerResponse = AnalyzerResponseUtil.generateResponse(context); 8 | System.out.println(analyzerResponse); 9 | AnalyzerResponse(provinceName=陕西省, cityName=榆林市, countyName=神木市, townName=大柳塔镇, provinceId=610000, cityId=610800, countyId=610881, townId=610881104, address=陕西省榆林市大柳塔) 10 | 邮箱:fengcone@163.com 11 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | com.jd.presort 8 | phasmida-parsing 9 | 1.0-SNAPSHOT 10 | 11 | 12 | 13 | com.github.fengcone 14 | phasmida 15 | 1.1-RELEASE 16 | 17 | 18 | org.apache.logging.log4j 19 | log4j-api 20 | 2.7 21 | 22 | 23 | org.apache.logging.log4j 24 | log4j-core 25 | 2.7 26 | 27 | 28 | org.apache.logging.log4j 29 | log4j-slf4j-impl 30 | 2.7 31 | 32 | 33 | com.lmax 34 | disruptor 35 | 3.2.0 36 | 37 | 38 | org.slf4j 39 | slf4j-api 40 | 1.7.0 41 | 42 | 43 | org.projectlombok 44 | lombok 45 | 1.16.18 46 | 47 | 48 | -------------------------------------------------------------------------------- /src/main/java/com/fengcone/phasmida/parsing/ParsingConstant.java: -------------------------------------------------------------------------------- 1 | package com.fengcone.phasmida.parsing; 2 | 3 | public class ParsingConstant { 4 | public static final long ROOT_NODE_ID = 0L; 5 | public static final int SPECIAL_INDEX = -31415926; 6 | } 7 | -------------------------------------------------------------------------------- /src/main/java/com/fengcone/phasmida/parsing/ParsingContext.java: -------------------------------------------------------------------------------- 1 | package com.fengcone.phasmida.parsing; 2 | 3 | import com.fengcone.phasmida.core.PhasmidaContext; 4 | import lombok.Getter; 5 | import lombok.Setter; 6 | 7 | import java.util.*; 8 | 9 | @Getter 10 | @Setter 11 | public class ParsingContext { 12 | private PhasmidaContext phasmidaContext; 13 | private String parsingString; 14 | private List successNodeList = new ArrayList(); 15 | private int nowIndex; 16 | private Map startIndexMap = new LinkedHashMap(); 17 | private boolean isOpenCross = true; 18 | private Map processedMap = new LinkedHashMap(); 19 | private Map endIndexMap = new LinkedHashMap(); 20 | private Set successFatherNodeSet = new HashSet(); 21 | private Set processedGrandson = new HashSet(); 22 | private boolean isNeedCross; 23 | 24 | public void reset() { 25 | phasmidaContext = null; 26 | parsingString = null; 27 | successNodeList.clear(); 28 | nowIndex = 0; 29 | startIndexMap.clear(); 30 | isOpenCross = true; 31 | processedMap.clear(); 32 | endIndexMap.clear(); 33 | successFatherNodeSet.clear(); 34 | processedGrandson.clear(); 35 | isNeedCross = false; 36 | } 37 | 38 | private ParsingTree parsingTree; 39 | 40 | public void initPhasmidaContext() { 41 | if (phasmidaContext == null) { 42 | phasmidaContext = new PhasmidaContext(parsingString); 43 | } else { 44 | nowIndex = phasmidaContext.getEndIndex(); 45 | phasmidaContext.setNextNeedBeHead(true); 46 | } 47 | } 48 | 49 | public void setNowIndex(int nowIndex) { 50 | this.nowIndex = nowIndex; 51 | if (phasmidaContext != null) { 52 | phasmidaContext.setStartIndex(0); 53 | phasmidaContext.setEndIndex(nowIndex); 54 | } 55 | } 56 | 57 | public boolean parsing() { 58 | return parsingTree.getRootParsingNode().process(this); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/com/fengcone/phasmida/parsing/ParsingNode.java: -------------------------------------------------------------------------------- 1 | package com.fengcone.phasmida.parsing; 2 | 3 | import com.fengcone.phasmida.core.Phasmida; 4 | import lombok.Getter; 5 | import lombok.Setter; 6 | 7 | import java.util.List; 8 | 9 | @Getter 10 | @Setter 11 | public class ParsingNode { 12 | private boolean reIndex; 13 | private boolean reFatherIndex; 14 | private boolean dengCross; 15 | private Phasmida phasmida; 16 | private String description; 17 | private String targetId; 18 | private long id; 19 | private long fatherId; 20 | private int parsingLevel; 21 | private List childrenNode; 22 | private ParsingNode nextNode; 23 | 24 | 25 | public boolean process(ParsingContext context) { 26 | handleStartIndex(context); 27 | int nowIndex = context.getNowIndex(); 28 | Integer processedResult = context.getProcessedMap().get(id); 29 | if (processedResult != null && Math.abs(processedResult) <= nowIndex) { 30 | return false; 31 | } 32 | boolean result = false; 33 | if (judgeCross(context)) { 34 | context.initPhasmidaContext(); 35 | result = phasmida.process(context.getPhasmidaContext()); 36 | context.getProcessedMap().put(id, result ? (nowIndex == 0 ? ParsingConstant.SPECIAL_INDEX : nowIndex) : -nowIndex); 37 | } 38 | if (result) { 39 | handleSuccessNode(context, nowIndex); 40 | if (isLastNode(this)) { 41 | return true; 42 | } 43 | if (reIndex) { 44 | context.setNowIndex(nowIndex); 45 | } 46 | if (childrenNode.get(0).process(context)) { 47 | return true; 48 | } 49 | } 50 | context.setNowIndex(nowIndex); 51 | if (nextNode == null) { 52 | if (context.isNeedCross() || fatherId == ParsingConstant.ROOT_NODE_ID) { 53 | return handleCrossProcess(context); 54 | } 55 | context.setNeedCross(true); 56 | return false; 57 | } 58 | return nextNode.process(context); 59 | } 60 | 61 | private boolean handleCrossProcess(ParsingContext context) { 62 | if (!context.isOpenCross()) { 63 | return false; 64 | } 65 | context.setOpenCross(false); 66 | if (context.getSuccessNodeList().size() == 0) { 67 | context.getProcessedGrandson().add(ParsingConstant.ROOT_NODE_ID); 68 | context.setNowIndex(0); 69 | boolean rootResult = handleGrandsons(context.getParsingTree().getRootParsingNodes(), context); 70 | if (rootResult) { 71 | return true; 72 | } 73 | } 74 | int successCount = context.getSuccessNodeList().size(); 75 | if (successCount == 0) { 76 | return false; 77 | } 78 | 79 | for (int i = successCount - 1; i >= 0; i--) { 80 | ParsingNode parsingNode = context.getSuccessNodeList().get(i); 81 | if (context.getProcessedGrandson().contains(parsingNode.getId())) { 82 | continue; 83 | } 84 | context.getProcessedGrandson().add(parsingNode.getId()); 85 | context.setNowIndex(context.getEndIndexMap().get(parsingNode.getId())); 86 | boolean grandsonsResult = handleGrandsons(parsingNode.getChildrenNode(), context); 87 | if (grandsonsResult) { 88 | return true; 89 | } 90 | if (successCount != context.getSuccessNodeList().size()) { 91 | context.setOpenCross(true); 92 | return handleCrossProcess(context); 93 | } 94 | } 95 | if (!context.getProcessedGrandson().contains(ParsingConstant.ROOT_NODE_ID)) { 96 | context.getProcessedGrandson().add(ParsingConstant.ROOT_NODE_ID); 97 | context.setNowIndex(0); 98 | boolean rootResult = handleGrandsons(context.getParsingTree().getRootParsingNodes(), context); 99 | if (rootResult) { 100 | return true; 101 | } 102 | if (successCount != context.getSuccessNodeList().size()) { 103 | context.setOpenCross(true); 104 | return handleCrossProcess(context); 105 | } 106 | } 107 | return false; 108 | } 109 | 110 | private boolean handleGrandsons(List list, ParsingContext context) { 111 | if (list == null || list.size() == 0) { 112 | return false; 113 | } 114 | for (ParsingNode parsingNode : list) { 115 | if (isLastNode(parsingNode)) { 116 | continue; 117 | } 118 | boolean grandsonResult = parsingNode.getChildrenNode().get(0).process(context); 119 | if (grandsonResult) { 120 | return true; 121 | } 122 | } 123 | return false; 124 | } 125 | 126 | private boolean isLastNode(ParsingNode node) { 127 | return node.getChildrenNode() == null || node.getChildrenNode().size() == 0; 128 | } 129 | 130 | 131 | private void handleSuccessNode(ParsingContext context, int nowIndex) { 132 | context.setNowIndex(context.getPhasmidaContext().getEndIndex()); 133 | context.getSuccessNodeList().add(this); 134 | context.getStartIndexMap().put(id, nowIndex); 135 | context.getEndIndexMap().put(id, context.getNowIndex()); 136 | ParsingNode parsingNode = context.getParsingTree().getNodesMap().get(fatherId); 137 | if (parsingNode != null) { 138 | context.getSuccessFatherNodeSet().add(parsingNode); 139 | } 140 | } 141 | 142 | private boolean judgeCross(ParsingContext context) { 143 | if (!dengCross) { 144 | return true; 145 | } 146 | if (context.getSuccessNodeList().size() == 0) { 147 | return false; 148 | } 149 | ParsingNode fatherNode = context.getSuccessNodeList().get(context.getSuccessNodeList().size() - 1); 150 | if (fatherId == fatherNode.getId()) { 151 | return true; 152 | } 153 | return false; 154 | } 155 | 156 | 157 | private void handleStartIndex(ParsingContext context) { 158 | if (!reFatherIndex) { 159 | return; 160 | } 161 | if (context.getSuccessNodeList().size() == 0) { 162 | return; 163 | } 164 | Long id = context.getSuccessNodeList().get(context.getSuccessNodeList().size() - 1).getId(); 165 | Integer startIndex = context.getStartIndexMap().get(id); 166 | if (startIndex != null && startIndex >= 0 && !context.isOpenCross()) { 167 | context.setNowIndex(startIndex); 168 | } 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /src/main/java/com/fengcone/phasmida/parsing/ParsingTree.java: -------------------------------------------------------------------------------- 1 | package com.fengcone.phasmida.parsing; 2 | 3 | import com.fengcone.phasmida.core.PhasmidaFactory; 4 | import lombok.Getter; 5 | import lombok.Setter; 6 | 7 | import java.io.BufferedReader; 8 | import java.io.File; 9 | import java.io.FileReader; 10 | import java.io.IOException; 11 | import java.util.ArrayList; 12 | import java.util.HashMap; 13 | import java.util.List; 14 | import java.util.Map; 15 | 16 | @Getter 17 | @Setter 18 | public class ParsingTree { 19 | private Map nodesMap; 20 | private Map> childrenListMap; 21 | private ParsingNode rootParsingNode; 22 | private List rootParsingNodes; 23 | 24 | 25 | public void init(List parsingNodes) { 26 | nodesMap = new HashMap(); 27 | childrenListMap = new HashMap>(); 28 | for (ParsingNode parsingNode : parsingNodes) { 29 | nodesMap.put(parsingNode.getId(), parsingNode); 30 | long fatherId = parsingNode.getFatherId(); 31 | List childrenList = childrenListMap.get(fatherId); 32 | if (childrenList == null) { 33 | childrenList = new ArrayList(); 34 | childrenListMap.put(fatherId, childrenList); 35 | } 36 | childrenList.add(parsingNode); 37 | } 38 | rootParsingNodes = childrenListMap.get(ParsingConstant.ROOT_NODE_ID); 39 | rootParsingNode = rootParsingNodes.get(0); 40 | setChildrenNode(rootParsingNodes); 41 | } 42 | 43 | private void setChildrenNode(List rootNodes) { 44 | ParsingNode lastNode = null; 45 | for (ParsingNode rootNode : rootNodes) { 46 | if (lastNode != null) { 47 | lastNode.setNextNode(rootNode); 48 | } 49 | lastNode = rootNode; 50 | List parsingNodes = childrenListMap.get(rootNode.getId()); 51 | if (parsingNodes == null || parsingNodes.size() == 0) { 52 | continue; 53 | } 54 | rootNode.setChildrenNode(parsingNodes); 55 | setChildrenNode(parsingNodes); 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/com/fengcone/phasmida/parsing/response/AnalyzerResponse.java: -------------------------------------------------------------------------------- 1 | package com.fengcone.phasmida.parsing.response; 2 | 3 | import lombok.Getter; 4 | import lombok.Setter; 5 | import lombok.ToString; 6 | 7 | @Getter 8 | @Setter 9 | @ToString 10 | public class AnalyzerResponse { 11 | private String provinceName; 12 | private String cityName; 13 | private String countyName; 14 | private String townName; 15 | private Integer provinceId; 16 | private Integer cityId; 17 | private Integer countyId; 18 | private Integer townId; 19 | private String address; 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/fengcone/phasmida/parsing/response/AnalyzerResponseUtil.java: -------------------------------------------------------------------------------- 1 | package com.fengcone.phasmida.parsing.response; 2 | 3 | 4 | 5 | import com.fengcone.phasmida.parsing.ParsingContext; 6 | import com.fengcone.phasmida.parsing.ParsingNode; 7 | 8 | import java.util.ArrayList; 9 | import java.util.LinkedHashSet; 10 | import java.util.List; 11 | import java.util.Set; 12 | 13 | public class AnalyzerResponseUtil { 14 | public static AnalyzerResponse generateResponse(ParsingContext context) { 15 | AnalyzerResponse response = new AnalyzerResponse(); 16 | response.setAddress(context.getParsingString()); 17 | List successNodes = context.getSuccessNodeList(); 18 | Set successFatherNodeSet = context.getSuccessFatherNodeSet(); 19 | int level = 4; 20 | if (successNodes.size() == 0) { 21 | return response; 22 | } 23 | ArrayList> rules = new ArrayList>(); 24 | for (int i = 0; i < level; i++) { 25 | rules.add(new LinkedHashSet()); 26 | } 27 | for (ParsingNode parsingNode : successNodes) { 28 | if (parsingNode.getParsingLevel() > level) { 29 | return response; 30 | } 31 | 32 | rules.get(parsingNode.getParsingLevel() - 1).add(parsingNode); 33 | } 34 | for (ParsingNode parsingNode : successFatherNodeSet) { 35 | if (parsingNode.getParsingLevel() > level) { 36 | return response; 37 | } 38 | rules.get(parsingNode.getParsingLevel() - 1).add(parsingNode); 39 | } 40 | for (int i = level - 1; i >= 0; i--) { 41 | ParsingNode compareRule = compareRule(rules.get(i), null); 42 | if (compareRule != null) { 43 | setResponse(rules, response, compareRule); 44 | return response; 45 | } 46 | } 47 | return response; 48 | } 49 | 50 | private static ParsingNode compareRule(Set ruleSet, ParsingNode child) { 51 | List rules = new ArrayList(ruleSet); 52 | if (rules.size() == 1) { 53 | return rules.get(0); 54 | } 55 | if (rules.size() == 0) { 56 | return null; 57 | } 58 | if (child != null) { 59 | for (ParsingNode presortRules : rules) { 60 | if (child.getFatherId() == presortRules.getId()) { 61 | return presortRules; 62 | } 63 | } 64 | } 65 | return rules.get(0); 66 | } 67 | 68 | private static void setResponse(ArrayList> rules, AnalyzerResponse response, ParsingNode last) { 69 | if (rules == null) { 70 | return; 71 | } 72 | switch (last.getParsingLevel()) { 73 | case 4: 74 | response.setTownId(Integer.valueOf(last.getTargetId())); 75 | response.setTownName(last.getDescription()); 76 | setResponse(rules, response, compareRule(rules.get(2), last)); 77 | break; 78 | case 3: 79 | response.setCountyId(Integer.valueOf(last.getTargetId())); 80 | response.setCountyName(last.getDescription()); 81 | setResponse(rules, response, compareRule(rules.get(1), last)); 82 | break; 83 | case 2: 84 | response.setCityId(Integer.valueOf(last.getTargetId())); 85 | response.setCityName(last.getDescription()); 86 | setResponse(rules, response, compareRule(rules.get(0), last)); 87 | break; 88 | case 1: 89 | response.setProvinceId(Integer.valueOf(last.getTargetId())); 90 | response.setProvinceName(last.getDescription()); 91 | break; 92 | default: 93 | setResponse(rules, response, compareRule(rules.get(last.getParsingLevel() - 1), last)); 94 | break; 95 | } 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /src/test/java/com/fengcone/phasmida/parsing/ParsingAddressTest.java: -------------------------------------------------------------------------------- 1 | package com.fengcone.phasmida.parsing; 2 | 3 | import com.fengcone.phasmida.core.PhasmidaFactory; 4 | import com.fengcone.phasmida.parsing.response.AnalyzerResponse; 5 | import com.fengcone.phasmida.parsing.response.AnalyzerResponseUtil; 6 | import com.fengcone.phasmida.registry.RegistryUtil; 7 | 8 | import java.io.BufferedReader; 9 | import java.io.File; 10 | import java.io.FileReader; 11 | import java.io.IOException; 12 | import java.util.ArrayList; 13 | import java.util.List; 14 | 15 | public class ParsingAddressTest { 16 | public static void main(String[] args) throws IOException { 17 | String address = "陕西省榆林市大柳塔"; 18 | ParsingContext context = new ParsingContext(); 19 | context.setParsingString(address); 20 | String dataPath = "D:\\WorkSpace\\phasmida-parsing\\src\\test\\java\\com\\fengcone\\phasmida\\parsing\\rules.data"; 21 | ParsingTree tree = init(new File(dataPath)); 22 | context.setParsingTree(tree); 23 | context.parsing(); 24 | AnalyzerResponse analyzerResponse = AnalyzerResponseUtil.generateResponse(context); 25 | System.out.println(analyzerResponse); 26 | address = "内蒙古康巴什区青春山"; 27 | context.reset(); 28 | context.setParsingString(address); 29 | context.parsing(); 30 | analyzerResponse = AnalyzerResponseUtil.generateResponse(context); 31 | System.out.println(analyzerResponse); 32 | } 33 | 34 | 35 | private static ParsingTree init(File file) throws IOException { 36 | BufferedReader reader = new BufferedReader(new FileReader(file)); 37 | List parsingNodes = new ArrayList(); 38 | PhasmidaFactory factory = new PhasmidaFactory(); 39 | RegistryUtil.registerStandardFragments(); 40 | String line; 41 | while ((line = reader.readLine()) != null) { 42 | ParsingNode node = new ParsingNode(); 43 | String[] split = line.split(";"); 44 | node.setId(Long.valueOf(split[0])); 45 | node.setParsingLevel(Integer.valueOf(split[1])); 46 | String regex = split[2]; 47 | if (!regex.contains("end")) { 48 | node.setPhasmida(factory.getPhasmida(regex)); 49 | } else { 50 | String[] split1 = regex.split(".end\\("); 51 | node.setPhasmida(factory.getPhasmida(split1[0])); 52 | for (int i = 1; i < split1.length; i++) { 53 | String sign = split1[i]; 54 | if (sign.equals("dc)")) { 55 | node.setDengCross(true); 56 | } else if (sign.equals("rf)")) { 57 | node.setReFatherIndex(true); 58 | } else if (sign.equals("r)")) { 59 | node.setReIndex(true); 60 | } 61 | } 62 | } 63 | node.setTargetId(split[3]); 64 | node.setFatherId(Long.valueOf(split[4])); 65 | node.setDescription(split[5]); 66 | parsingNodes.add(node); 67 | } 68 | ParsingTree tree = new ParsingTree(); 69 | tree.init(parsingNodes); 70 | return tree; 71 | } 72 | } 73 | --------------------------------------------------------------------------------