├── README.md
├── pom.xml
└── src
├── main
└── java
│ └── com
│ └── fengcone
│ └── phasmida
│ └── parsing
│ ├── ParsingConstant.java
│ ├── ParsingContext.java
│ ├── ParsingNode.java
│ ├── ParsingTree.java
│ └── response
│ ├── AnalyzerResponse.java
│ └── AnalyzerResponseUtil.java
└── test
└── java
└── com
└── fengcone
└── phasmida
└── parsing
├── ParsingAddressTest.java
└── rules.data
/README.md:
--------------------------------------------------------------------------------
1 | # phasmida-parsing
2 | chinese-address-parsing
3 | String address = "陕西省榆林市大柳塔";
4 | ParsingContext context = new ParsingContext();
5 | context.setParsingString(address);
6 | context.parsing();
7 | AnalyzerResponse analyzerResponse = AnalyzerResponseUtil.generateResponse(context);
8 | System.out.println(analyzerResponse);
9 | AnalyzerResponse(provinceName=陕西省, cityName=榆林市, countyName=神木市, townName=大柳塔镇, provinceId=610000, cityId=610800, countyId=610881, townId=610881104, address=陕西省榆林市大柳塔)
10 | 邮箱:fengcone@163.com
11 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | com.jd.presort
8 | phasmida-parsing
9 | 1.0-SNAPSHOT
10 |
11 |
12 |
13 | com.github.fengcone
14 | phasmida
15 | 1.1-RELEASE
16 |
17 |
18 | org.apache.logging.log4j
19 | log4j-api
20 | 2.7
21 |
22 |
23 | org.apache.logging.log4j
24 | log4j-core
25 | 2.7
26 |
27 |
28 | org.apache.logging.log4j
29 | log4j-slf4j-impl
30 | 2.7
31 |
32 |
33 | com.lmax
34 | disruptor
35 | 3.2.0
36 |
37 |
38 | org.slf4j
39 | slf4j-api
40 | 1.7.0
41 |
42 |
43 | org.projectlombok
44 | lombok
45 | 1.16.18
46 |
47 |
48 |
--------------------------------------------------------------------------------
/src/main/java/com/fengcone/phasmida/parsing/ParsingConstant.java:
--------------------------------------------------------------------------------
1 | package com.fengcone.phasmida.parsing;
2 |
3 | public class ParsingConstant {
4 | public static final long ROOT_NODE_ID = 0L;
5 | public static final int SPECIAL_INDEX = -31415926;
6 | }
7 |
--------------------------------------------------------------------------------
/src/main/java/com/fengcone/phasmida/parsing/ParsingContext.java:
--------------------------------------------------------------------------------
1 | package com.fengcone.phasmida.parsing;
2 |
3 | import com.fengcone.phasmida.core.PhasmidaContext;
4 | import lombok.Getter;
5 | import lombok.Setter;
6 |
7 | import java.util.*;
8 |
9 | @Getter
10 | @Setter
11 | public class ParsingContext {
12 | private PhasmidaContext phasmidaContext;
13 | private String parsingString;
14 | private List successNodeList = new ArrayList();
15 | private int nowIndex;
16 | private Map startIndexMap = new LinkedHashMap();
17 | private boolean isOpenCross = true;
18 | private Map processedMap = new LinkedHashMap();
19 | private Map endIndexMap = new LinkedHashMap();
20 | private Set successFatherNodeSet = new HashSet();
21 | private Set processedGrandson = new HashSet();
22 | private boolean isNeedCross;
23 |
24 | public void reset() {
25 | phasmidaContext = null;
26 | parsingString = null;
27 | successNodeList.clear();
28 | nowIndex = 0;
29 | startIndexMap.clear();
30 | isOpenCross = true;
31 | processedMap.clear();
32 | endIndexMap.clear();
33 | successFatherNodeSet.clear();
34 | processedGrandson.clear();
35 | isNeedCross = false;
36 | }
37 |
38 | private ParsingTree parsingTree;
39 |
40 | public void initPhasmidaContext() {
41 | if (phasmidaContext == null) {
42 | phasmidaContext = new PhasmidaContext(parsingString);
43 | } else {
44 | nowIndex = phasmidaContext.getEndIndex();
45 | phasmidaContext.setNextNeedBeHead(true);
46 | }
47 | }
48 |
49 | public void setNowIndex(int nowIndex) {
50 | this.nowIndex = nowIndex;
51 | if (phasmidaContext != null) {
52 | phasmidaContext.setStartIndex(0);
53 | phasmidaContext.setEndIndex(nowIndex);
54 | }
55 | }
56 |
57 | public boolean parsing() {
58 | return parsingTree.getRootParsingNode().process(this);
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/src/main/java/com/fengcone/phasmida/parsing/ParsingNode.java:
--------------------------------------------------------------------------------
1 | package com.fengcone.phasmida.parsing;
2 |
3 | import com.fengcone.phasmida.core.Phasmida;
4 | import lombok.Getter;
5 | import lombok.Setter;
6 |
7 | import java.util.List;
8 |
9 | @Getter
10 | @Setter
11 | public class ParsingNode {
12 | private boolean reIndex;
13 | private boolean reFatherIndex;
14 | private boolean dengCross;
15 | private Phasmida phasmida;
16 | private String description;
17 | private String targetId;
18 | private long id;
19 | private long fatherId;
20 | private int parsingLevel;
21 | private List childrenNode;
22 | private ParsingNode nextNode;
23 |
24 |
25 | public boolean process(ParsingContext context) {
26 | handleStartIndex(context);
27 | int nowIndex = context.getNowIndex();
28 | Integer processedResult = context.getProcessedMap().get(id);
29 | if (processedResult != null && Math.abs(processedResult) <= nowIndex) {
30 | return false;
31 | }
32 | boolean result = false;
33 | if (judgeCross(context)) {
34 | context.initPhasmidaContext();
35 | result = phasmida.process(context.getPhasmidaContext());
36 | context.getProcessedMap().put(id, result ? (nowIndex == 0 ? ParsingConstant.SPECIAL_INDEX : nowIndex) : -nowIndex);
37 | }
38 | if (result) {
39 | handleSuccessNode(context, nowIndex);
40 | if (isLastNode(this)) {
41 | return true;
42 | }
43 | if (reIndex) {
44 | context.setNowIndex(nowIndex);
45 | }
46 | if (childrenNode.get(0).process(context)) {
47 | return true;
48 | }
49 | }
50 | context.setNowIndex(nowIndex);
51 | if (nextNode == null) {
52 | if (context.isNeedCross() || fatherId == ParsingConstant.ROOT_NODE_ID) {
53 | return handleCrossProcess(context);
54 | }
55 | context.setNeedCross(true);
56 | return false;
57 | }
58 | return nextNode.process(context);
59 | }
60 |
61 | private boolean handleCrossProcess(ParsingContext context) {
62 | if (!context.isOpenCross()) {
63 | return false;
64 | }
65 | context.setOpenCross(false);
66 | if (context.getSuccessNodeList().size() == 0) {
67 | context.getProcessedGrandson().add(ParsingConstant.ROOT_NODE_ID);
68 | context.setNowIndex(0);
69 | boolean rootResult = handleGrandsons(context.getParsingTree().getRootParsingNodes(), context);
70 | if (rootResult) {
71 | return true;
72 | }
73 | }
74 | int successCount = context.getSuccessNodeList().size();
75 | if (successCount == 0) {
76 | return false;
77 | }
78 |
79 | for (int i = successCount - 1; i >= 0; i--) {
80 | ParsingNode parsingNode = context.getSuccessNodeList().get(i);
81 | if (context.getProcessedGrandson().contains(parsingNode.getId())) {
82 | continue;
83 | }
84 | context.getProcessedGrandson().add(parsingNode.getId());
85 | context.setNowIndex(context.getEndIndexMap().get(parsingNode.getId()));
86 | boolean grandsonsResult = handleGrandsons(parsingNode.getChildrenNode(), context);
87 | if (grandsonsResult) {
88 | return true;
89 | }
90 | if (successCount != context.getSuccessNodeList().size()) {
91 | context.setOpenCross(true);
92 | return handleCrossProcess(context);
93 | }
94 | }
95 | if (!context.getProcessedGrandson().contains(ParsingConstant.ROOT_NODE_ID)) {
96 | context.getProcessedGrandson().add(ParsingConstant.ROOT_NODE_ID);
97 | context.setNowIndex(0);
98 | boolean rootResult = handleGrandsons(context.getParsingTree().getRootParsingNodes(), context);
99 | if (rootResult) {
100 | return true;
101 | }
102 | if (successCount != context.getSuccessNodeList().size()) {
103 | context.setOpenCross(true);
104 | return handleCrossProcess(context);
105 | }
106 | }
107 | return false;
108 | }
109 |
110 | private boolean handleGrandsons(List list, ParsingContext context) {
111 | if (list == null || list.size() == 0) {
112 | return false;
113 | }
114 | for (ParsingNode parsingNode : list) {
115 | if (isLastNode(parsingNode)) {
116 | continue;
117 | }
118 | boolean grandsonResult = parsingNode.getChildrenNode().get(0).process(context);
119 | if (grandsonResult) {
120 | return true;
121 | }
122 | }
123 | return false;
124 | }
125 |
126 | private boolean isLastNode(ParsingNode node) {
127 | return node.getChildrenNode() == null || node.getChildrenNode().size() == 0;
128 | }
129 |
130 |
131 | private void handleSuccessNode(ParsingContext context, int nowIndex) {
132 | context.setNowIndex(context.getPhasmidaContext().getEndIndex());
133 | context.getSuccessNodeList().add(this);
134 | context.getStartIndexMap().put(id, nowIndex);
135 | context.getEndIndexMap().put(id, context.getNowIndex());
136 | ParsingNode parsingNode = context.getParsingTree().getNodesMap().get(fatherId);
137 | if (parsingNode != null) {
138 | context.getSuccessFatherNodeSet().add(parsingNode);
139 | }
140 | }
141 |
142 | private boolean judgeCross(ParsingContext context) {
143 | if (!dengCross) {
144 | return true;
145 | }
146 | if (context.getSuccessNodeList().size() == 0) {
147 | return false;
148 | }
149 | ParsingNode fatherNode = context.getSuccessNodeList().get(context.getSuccessNodeList().size() - 1);
150 | if (fatherId == fatherNode.getId()) {
151 | return true;
152 | }
153 | return false;
154 | }
155 |
156 |
157 | private void handleStartIndex(ParsingContext context) {
158 | if (!reFatherIndex) {
159 | return;
160 | }
161 | if (context.getSuccessNodeList().size() == 0) {
162 | return;
163 | }
164 | Long id = context.getSuccessNodeList().get(context.getSuccessNodeList().size() - 1).getId();
165 | Integer startIndex = context.getStartIndexMap().get(id);
166 | if (startIndex != null && startIndex >= 0 && !context.isOpenCross()) {
167 | context.setNowIndex(startIndex);
168 | }
169 | }
170 | }
171 |
--------------------------------------------------------------------------------
/src/main/java/com/fengcone/phasmida/parsing/ParsingTree.java:
--------------------------------------------------------------------------------
1 | package com.fengcone.phasmida.parsing;
2 |
3 | import com.fengcone.phasmida.core.PhasmidaFactory;
4 | import lombok.Getter;
5 | import lombok.Setter;
6 |
7 | import java.io.BufferedReader;
8 | import java.io.File;
9 | import java.io.FileReader;
10 | import java.io.IOException;
11 | import java.util.ArrayList;
12 | import java.util.HashMap;
13 | import java.util.List;
14 | import java.util.Map;
15 |
16 | @Getter
17 | @Setter
18 | public class ParsingTree {
19 | private Map nodesMap;
20 | private Map> childrenListMap;
21 | private ParsingNode rootParsingNode;
22 | private List rootParsingNodes;
23 |
24 |
25 | public void init(List parsingNodes) {
26 | nodesMap = new HashMap();
27 | childrenListMap = new HashMap>();
28 | for (ParsingNode parsingNode : parsingNodes) {
29 | nodesMap.put(parsingNode.getId(), parsingNode);
30 | long fatherId = parsingNode.getFatherId();
31 | List childrenList = childrenListMap.get(fatherId);
32 | if (childrenList == null) {
33 | childrenList = new ArrayList();
34 | childrenListMap.put(fatherId, childrenList);
35 | }
36 | childrenList.add(parsingNode);
37 | }
38 | rootParsingNodes = childrenListMap.get(ParsingConstant.ROOT_NODE_ID);
39 | rootParsingNode = rootParsingNodes.get(0);
40 | setChildrenNode(rootParsingNodes);
41 | }
42 |
43 | private void setChildrenNode(List rootNodes) {
44 | ParsingNode lastNode = null;
45 | for (ParsingNode rootNode : rootNodes) {
46 | if (lastNode != null) {
47 | lastNode.setNextNode(rootNode);
48 | }
49 | lastNode = rootNode;
50 | List parsingNodes = childrenListMap.get(rootNode.getId());
51 | if (parsingNodes == null || parsingNodes.size() == 0) {
52 | continue;
53 | }
54 | rootNode.setChildrenNode(parsingNodes);
55 | setChildrenNode(parsingNodes);
56 | }
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/src/main/java/com/fengcone/phasmida/parsing/response/AnalyzerResponse.java:
--------------------------------------------------------------------------------
1 | package com.fengcone.phasmida.parsing.response;
2 |
3 | import lombok.Getter;
4 | import lombok.Setter;
5 | import lombok.ToString;
6 |
7 | @Getter
8 | @Setter
9 | @ToString
10 | public class AnalyzerResponse {
11 | private String provinceName;
12 | private String cityName;
13 | private String countyName;
14 | private String townName;
15 | private Integer provinceId;
16 | private Integer cityId;
17 | private Integer countyId;
18 | private Integer townId;
19 | private String address;
20 | }
21 |
--------------------------------------------------------------------------------
/src/main/java/com/fengcone/phasmida/parsing/response/AnalyzerResponseUtil.java:
--------------------------------------------------------------------------------
1 | package com.fengcone.phasmida.parsing.response;
2 |
3 |
4 |
5 | import com.fengcone.phasmida.parsing.ParsingContext;
6 | import com.fengcone.phasmida.parsing.ParsingNode;
7 |
8 | import java.util.ArrayList;
9 | import java.util.LinkedHashSet;
10 | import java.util.List;
11 | import java.util.Set;
12 |
13 | public class AnalyzerResponseUtil {
14 | public static AnalyzerResponse generateResponse(ParsingContext context) {
15 | AnalyzerResponse response = new AnalyzerResponse();
16 | response.setAddress(context.getParsingString());
17 | List successNodes = context.getSuccessNodeList();
18 | Set successFatherNodeSet = context.getSuccessFatherNodeSet();
19 | int level = 4;
20 | if (successNodes.size() == 0) {
21 | return response;
22 | }
23 | ArrayList> rules = new ArrayList>();
24 | for (int i = 0; i < level; i++) {
25 | rules.add(new LinkedHashSet());
26 | }
27 | for (ParsingNode parsingNode : successNodes) {
28 | if (parsingNode.getParsingLevel() > level) {
29 | return response;
30 | }
31 |
32 | rules.get(parsingNode.getParsingLevel() - 1).add(parsingNode);
33 | }
34 | for (ParsingNode parsingNode : successFatherNodeSet) {
35 | if (parsingNode.getParsingLevel() > level) {
36 | return response;
37 | }
38 | rules.get(parsingNode.getParsingLevel() - 1).add(parsingNode);
39 | }
40 | for (int i = level - 1; i >= 0; i--) {
41 | ParsingNode compareRule = compareRule(rules.get(i), null);
42 | if (compareRule != null) {
43 | setResponse(rules, response, compareRule);
44 | return response;
45 | }
46 | }
47 | return response;
48 | }
49 |
50 | private static ParsingNode compareRule(Set ruleSet, ParsingNode child) {
51 | List rules = new ArrayList(ruleSet);
52 | if (rules.size() == 1) {
53 | return rules.get(0);
54 | }
55 | if (rules.size() == 0) {
56 | return null;
57 | }
58 | if (child != null) {
59 | for (ParsingNode presortRules : rules) {
60 | if (child.getFatherId() == presortRules.getId()) {
61 | return presortRules;
62 | }
63 | }
64 | }
65 | return rules.get(0);
66 | }
67 |
68 | private static void setResponse(ArrayList> rules, AnalyzerResponse response, ParsingNode last) {
69 | if (rules == null) {
70 | return;
71 | }
72 | switch (last.getParsingLevel()) {
73 | case 4:
74 | response.setTownId(Integer.valueOf(last.getTargetId()));
75 | response.setTownName(last.getDescription());
76 | setResponse(rules, response, compareRule(rules.get(2), last));
77 | break;
78 | case 3:
79 | response.setCountyId(Integer.valueOf(last.getTargetId()));
80 | response.setCountyName(last.getDescription());
81 | setResponse(rules, response, compareRule(rules.get(1), last));
82 | break;
83 | case 2:
84 | response.setCityId(Integer.valueOf(last.getTargetId()));
85 | response.setCityName(last.getDescription());
86 | setResponse(rules, response, compareRule(rules.get(0), last));
87 | break;
88 | case 1:
89 | response.setProvinceId(Integer.valueOf(last.getTargetId()));
90 | response.setProvinceName(last.getDescription());
91 | break;
92 | default:
93 | setResponse(rules, response, compareRule(rules.get(last.getParsingLevel() - 1), last));
94 | break;
95 | }
96 | }
97 | }
98 |
--------------------------------------------------------------------------------
/src/test/java/com/fengcone/phasmida/parsing/ParsingAddressTest.java:
--------------------------------------------------------------------------------
1 | package com.fengcone.phasmida.parsing;
2 |
3 | import com.fengcone.phasmida.core.PhasmidaFactory;
4 | import com.fengcone.phasmida.parsing.response.AnalyzerResponse;
5 | import com.fengcone.phasmida.parsing.response.AnalyzerResponseUtil;
6 | import com.fengcone.phasmida.registry.RegistryUtil;
7 |
8 | import java.io.BufferedReader;
9 | import java.io.File;
10 | import java.io.FileReader;
11 | import java.io.IOException;
12 | import java.util.ArrayList;
13 | import java.util.List;
14 |
15 | public class ParsingAddressTest {
16 | public static void main(String[] args) throws IOException {
17 | String address = "陕西省榆林市大柳塔";
18 | ParsingContext context = new ParsingContext();
19 | context.setParsingString(address);
20 | String dataPath = "D:\\WorkSpace\\phasmida-parsing\\src\\test\\java\\com\\fengcone\\phasmida\\parsing\\rules.data";
21 | ParsingTree tree = init(new File(dataPath));
22 | context.setParsingTree(tree);
23 | context.parsing();
24 | AnalyzerResponse analyzerResponse = AnalyzerResponseUtil.generateResponse(context);
25 | System.out.println(analyzerResponse);
26 | address = "内蒙古康巴什区青春山";
27 | context.reset();
28 | context.setParsingString(address);
29 | context.parsing();
30 | analyzerResponse = AnalyzerResponseUtil.generateResponse(context);
31 | System.out.println(analyzerResponse);
32 | }
33 |
34 |
35 | private static ParsingTree init(File file) throws IOException {
36 | BufferedReader reader = new BufferedReader(new FileReader(file));
37 | List parsingNodes = new ArrayList();
38 | PhasmidaFactory factory = new PhasmidaFactory();
39 | RegistryUtil.registerStandardFragments();
40 | String line;
41 | while ((line = reader.readLine()) != null) {
42 | ParsingNode node = new ParsingNode();
43 | String[] split = line.split(";");
44 | node.setId(Long.valueOf(split[0]));
45 | node.setParsingLevel(Integer.valueOf(split[1]));
46 | String regex = split[2];
47 | if (!regex.contains("end")) {
48 | node.setPhasmida(factory.getPhasmida(regex));
49 | } else {
50 | String[] split1 = regex.split(".end\\(");
51 | node.setPhasmida(factory.getPhasmida(split1[0]));
52 | for (int i = 1; i < split1.length; i++) {
53 | String sign = split1[i];
54 | if (sign.equals("dc)")) {
55 | node.setDengCross(true);
56 | } else if (sign.equals("rf)")) {
57 | node.setReFatherIndex(true);
58 | } else if (sign.equals("r)")) {
59 | node.setReIndex(true);
60 | }
61 | }
62 | }
63 | node.setTargetId(split[3]);
64 | node.setFatherId(Long.valueOf(split[4]));
65 | node.setDescription(split[5]);
66 | parsingNodes.add(node);
67 | }
68 | ParsingTree tree = new ParsingTree();
69 | tree.init(parsingNodes);
70 | return tree;
71 | }
72 | }
73 |
--------------------------------------------------------------------------------