├── .gitignore
├── LICENSE
├── README.md
├── build.sh
├── lib
└── install.sh
├── models
└── dl.sh
├── nifi-corenlp-nar
├── .classpath
├── .gitignore
├── .settings
│ ├── org.eclipse.core.resources.prefs
│ ├── org.eclipse.jdt.core.prefs
│ └── org.eclipse.m2e.core.prefs
└── pom.xml
├── nifi-corenlp-processors
├── .classpath
├── .gitignore
├── .settings
│ ├── org.eclipse.core.resources.prefs
│ ├── org.eclipse.jdt.core.prefs
│ └── org.eclipse.m2e.core.prefs
├── pom.xml
└── src
│ ├── main
│ ├── java
│ │ └── com
│ │ │ └── dataflowdeveloper
│ │ │ └── processors
│ │ │ └── process
│ │ │ ├── CoreNLPProcessor.java
│ │ │ ├── Location.java
│ │ │ ├── PersonName.java
│ │ │ └── SentimentService.java
│ └── resources
│ │ └── META-INF
│ │ └── services
│ │ └── org.apache.nifi.processor.Processor
│ └── test
│ ├── java
│ └── com
│ │ └── dataflowdeveloper
│ │ └── processors
│ │ └── process
│ │ └── CoreNLPProcessorTest.java
│ └── resources
│ └── test.csv
├── pom.xml
├── pushtogithub.sh
└── upload.sh
/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 | /target/*.*
3 | /.settings/
4 | .DS_Store
5 | .springBeans
6 | .project
7 | /src/main/resources/application.yml
8 | /src/main/resources/application.properties
9 | application.*
10 | # Mobile Tools for Java (J2ME)
11 | .mtj.tmp/
12 | deploy.sh
13 |
14 | # Package Files #
15 | *.jar
16 | *.war
17 | *.ear
18 | *.nar
19 | *.zip
20 | *.tar
21 | *.gz
22 | *.cz
23 | *.class
24 |
25 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
26 | hs_err_pid*
27 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # nifi-corenlp-processor
2 | Apache NiFi Custom Processor for working with Stanford CoreNLP for Sentiment Analysis in Java 8
3 |
--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
1 | mvn install -DskipTests
2 |
--------------------------------------------------------------------------------
/lib/install.sh:
--------------------------------------------------------------------------------
1 | mvn install:install-file -Dfile=vader.jar -DgroupId=com.vader -DartifactId=vader -Dversion=1.0 -Dpackaging=jar -DgeneratePom=true
2 |
--------------------------------------------------------------------------------
/models/dl.sh:
--------------------------------------------------------------------------------
1 | wget http://nlp.stanford.edu/software/stanford-english-corenlp-2018-02-27-models.jar
2 |
--------------------------------------------------------------------------------
/nifi-corenlp-nar/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/nifi-corenlp-nar/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 |
--------------------------------------------------------------------------------
/nifi-corenlp-nar/.settings/org.eclipse.core.resources.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | encoding/=UTF-8
3 |
--------------------------------------------------------------------------------
/nifi-corenlp-nar/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
3 | org.eclipse.jdt.core.compiler.compliance=1.8
4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
5 | org.eclipse.jdt.core.compiler.source=1.8
6 |
--------------------------------------------------------------------------------
/nifi-corenlp-nar/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 |
--------------------------------------------------------------------------------
/nifi-corenlp-nar/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
16 |
17 | 4.0.0
18 |
19 |
20 | com.dataflowdeveloper
21 | corenlp-processor
22 | 1.6
23 |
24 |
25 | nifi-corenlp-nar
26 | nar
27 |
28 | true
29 | true
30 |
31 |
32 |
33 |
34 | com.dataflowdeveloper
35 | nifi-corenlp-processors
36 | 1.6
37 |
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/nifi-corenlp-processors/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/nifi-corenlp-processors/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 |
--------------------------------------------------------------------------------
/nifi-corenlp-processors/.settings/org.eclipse.core.resources.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | encoding//src/main/java=UTF-8
3 | encoding//src/main/resources=UTF-8
4 | encoding//src/test/java=UTF-8
5 | encoding//src/test/resources=UTF-8
6 | encoding/=UTF-8
7 |
--------------------------------------------------------------------------------
/nifi-corenlp-processors/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
3 | org.eclipse.jdt.core.compiler.compliance=1.8
4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
5 | org.eclipse.jdt.core.compiler.source=1.8
6 |
--------------------------------------------------------------------------------
/nifi-corenlp-processors/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 |
--------------------------------------------------------------------------------
/nifi-corenlp-processors/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
16 |
17 | 4.0.0
18 |
19 |
20 | com.dataflowdeveloper
21 | corenlp-processor
22 | 1.6
23 |
24 |
25 | nifi-corenlp-processors
26 | jar
27 |
28 |
29 |
30 | org.apache.nifi
31 | nifi-api
32 |
33 |
34 | org.apache.nifi
35 | nifi-processor-utils
36 | 1.6.0
37 |
38 |
39 | org.apache.nifi
40 | nifi-mock
41 | test
42 | 1.6.0
43 |
44 |
45 | org.slf4j
46 | slf4j-simple
47 | test
48 |
49 |
50 | junit
51 | junit
52 | test
53 |
54 |
55 |
56 |
57 | edu.stanford.nlp
58 | stanford-corenlp
59 | 3.9.1
60 |
61 |
62 | edu.stanford.nlp
63 | stanford-corenlp
64 | 3.9.1
65 | models
66 |
67 |
68 | com.google.code.gson
69 | gson
70 | 2.8.2
71 |
72 |
73 |
74 |
75 | jitpack.io
76 | https://jitpack.io
77 |
78 |
79 |
80 |
--------------------------------------------------------------------------------
/nifi-corenlp-processors/src/main/java/com/dataflowdeveloper/processors/process/CoreNLPProcessor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package com.dataflowdeveloper.processors.process;
18 |
19 | import java.util.ArrayList;
20 | import java.util.Collections;
21 | import java.util.HashSet;
22 | import java.util.List;
23 | import java.util.Set;
24 |
25 | import org.apache.nifi.annotation.behavior.ReadsAttribute;
26 | import org.apache.nifi.annotation.behavior.ReadsAttributes;
27 | import org.apache.nifi.annotation.behavior.WritesAttribute;
28 | import org.apache.nifi.annotation.behavior.WritesAttributes;
29 | import org.apache.nifi.annotation.documentation.CapabilityDescription;
30 | import org.apache.nifi.annotation.documentation.SeeAlso;
31 | import org.apache.nifi.annotation.documentation.Tags;
32 | import org.apache.nifi.annotation.lifecycle.OnScheduled;
33 | import org.apache.nifi.components.PropertyDescriptor;
34 | import org.apache.nifi.flowfile.FlowFile;
35 | import org.apache.nifi.processor.AbstractProcessor;
36 | import org.apache.nifi.processor.ProcessContext;
37 | import org.apache.nifi.processor.ProcessSession;
38 | import org.apache.nifi.processor.ProcessorInitializationContext;
39 | import org.apache.nifi.processor.Relationship;
40 | import org.apache.nifi.processor.exception.ProcessException;
41 | import org.apache.nifi.processor.util.StandardValidators;
42 |
43 | @Tags({ "corenlpprocessor" })
44 | @CapabilityDescription("Run Stanford CoreNLP Sentiment Analysis")
45 | @SeeAlso({})
46 | @ReadsAttributes({ @ReadsAttribute(attribute = "sentence", description = "sentence to analyze") })
47 | @WritesAttributes({
48 | @WritesAttribute(attribute = "sentiment", description = "Stanford CoreNLP sentiment analysis of that sentence.") })
49 | public class CoreNLPProcessor extends AbstractProcessor {
50 |
51 | public static final String ATTRIBUTE_OUTPUT_NAME = "sentiment";
52 | public static final String ATTRIBUTE_INPUT_NAME = "sentence";
53 | public static final String PROPERTY_NAME_EXTRA = "Extra Resources";
54 |
55 | public static final PropertyDescriptor MY_PROPERTY = new PropertyDescriptor.Builder().name(ATTRIBUTE_INPUT_NAME)
56 | .description("A sentence to parse, such as a Tweet.").required(true).expressionLanguageSupported(true)
57 | .addValidator(StandardValidators.NON_EMPTY_VALIDATOR).build();
58 |
59 | public static final Relationship REL_SUCCESS = new Relationship.Builder().name("success")
60 | .description("Successfully determine sentiment.").build();
61 |
62 | public static final Relationship REL_FAILURE = new Relationship.Builder().name("failure")
63 | .description("Failed to determine sentiment.").build();
64 |
65 | private List descriptors;
66 |
67 | private Set relationships;
68 |
69 | private SentimentService service;
70 |
71 | @Override
72 | protected void init(final ProcessorInitializationContext context) {
73 | final List descriptors = new ArrayList();
74 | descriptors.add(MY_PROPERTY);
75 | this.descriptors = Collections.unmodifiableList(descriptors);
76 |
77 | final Set relationships = new HashSet();
78 | relationships.add(REL_SUCCESS);
79 | relationships.add(REL_FAILURE);
80 | this.relationships = Collections.unmodifiableSet(relationships);
81 | }
82 |
83 | @Override
84 | public Set getRelationships() {
85 | return this.relationships;
86 | }
87 |
88 | @Override
89 | public final List getSupportedPropertyDescriptors() {
90 | return descriptors;
91 | }
92 |
93 | /**
94 | * initialize sentiment service
95 | */
96 | private void initService() {
97 | service = new SentimentService();
98 | }
99 |
100 | @OnScheduled
101 | public void onScheduled(final ProcessContext context) {
102 | initService();
103 | return;
104 | }
105 |
106 | @Override
107 | public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
108 | FlowFile flowFile = session.get();
109 | if (flowFile == null) {
110 | flowFile = session.create();
111 | }
112 | if (service == null) {
113 | initService();
114 | }
115 | try {
116 | flowFile.getAttributes();
117 |
118 | String sentence = flowFile.getAttribute(ATTRIBUTE_INPUT_NAME);
119 | String sentence2 = context.getProperty(ATTRIBUTE_INPUT_NAME).evaluateAttributeExpressions(flowFile)
120 | .getValue();
121 |
122 | if (sentence == null) {
123 | sentence = sentence2;
124 | }
125 | if (sentence == null) {
126 | return;
127 | }
128 |
129 | String value = service.getSentimentNew(sentence);
130 |
131 | if (value == null) {
132 | return;
133 | }
134 |
135 | flowFile = session.putAttribute(flowFile, ATTRIBUTE_OUTPUT_NAME, value);
136 |
137 | session.transfer(flowFile, REL_SUCCESS);
138 | session.commit();
139 | } catch (final Throwable t) {
140 | getLogger().error("Unable to process Sentiment Processor file " + t.getLocalizedMessage());
141 | getLogger().error("{} failed to process due to {}; rolling back session", new Object[] { this, t });
142 | throw t;
143 | }
144 | }
145 | }
--------------------------------------------------------------------------------
/nifi-corenlp-processors/src/main/java/com/dataflowdeveloper/processors/process/Location.java:
--------------------------------------------------------------------------------
1 | package com.dataflowdeveloper.processors.process;
2 |
3 | import java.io.Serializable;
4 |
5 | /**
6 | *
7 | * @author tspann
8 | *
9 | */
10 | public class Location implements Serializable {
11 |
12 | private static final long serialVersionUID = -813050143597962280L;
13 | private String location = null;
14 |
15 | @Override
16 | public String toString() {
17 | StringBuilder builder = new StringBuilder();
18 | builder.append("Location [location=");
19 | builder.append(location);
20 | builder.append("]");
21 | return builder.toString();
22 | }
23 |
24 | public String getLocation() {
25 | return location;
26 | }
27 |
28 | public void setLocation(String location) {
29 | this.location = location;
30 | }
31 |
32 | /**
33 | * @param location
34 | */
35 | public Location(String location) {
36 | super();
37 | this.location = location;
38 | }
39 |
40 | /**
41 | *
42 | */
43 | public Location() {
44 | super();
45 | }
46 |
47 |
48 | }
49 |
--------------------------------------------------------------------------------
/nifi-corenlp-processors/src/main/java/com/dataflowdeveloper/processors/process/PersonName.java:
--------------------------------------------------------------------------------
1 | package com.dataflowdeveloper.processors.process;
2 |
3 | import java.io.Serializable;
4 |
5 | /**
6 | *
7 | * @author tspann
8 | *
9 | */
10 | public class PersonName implements Serializable {
11 |
12 | /**
13 | *
14 | */
15 | private static final long serialVersionUID = -864130114213352566L;
16 |
17 | private String name = "";
18 |
19 | public String getName() {
20 | return name;
21 | }
22 |
23 | public void setName(String name) {
24 | this.name = name;
25 | }
26 |
27 |
28 | @Override
29 | public String toString() {
30 | StringBuilder builder = new StringBuilder();
31 | builder.append("PersonName [name=");
32 | builder.append(name);
33 | builder.append("]");
34 | return builder.toString();
35 | }
36 |
37 | /**
38 | *
39 | */
40 | public PersonName() {
41 | super();
42 | }
43 |
44 | /**
45 | * @param name
46 | */
47 | public PersonName(String name) {
48 | super();
49 | this.name = name;
50 | }
51 | }
--------------------------------------------------------------------------------
/nifi-corenlp-processors/src/main/java/com/dataflowdeveloper/processors/process/SentimentService.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.dataflowdeveloper.processors.process;
5 |
6 | import java.util.Properties;
7 |
8 | import edu.stanford.nlp.ling.CoreAnnotations;
9 | import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations;
10 | import edu.stanford.nlp.pipeline.Annotation;
11 | import edu.stanford.nlp.pipeline.StanfordCoreNLP;
12 | import edu.stanford.nlp.sentiment.SentimentCoreAnnotations;
13 | import edu.stanford.nlp.sentiment.SentimentCoreAnnotations.SentimentAnnotatedTree;
14 | import edu.stanford.nlp.trees.Tree;
15 | import edu.stanford.nlp.util.CoreMap;
16 |
17 | /**
18 | * @author tspann
19 | *
20 | */
21 | public class SentimentService {
22 |
23 | private static final String DEFAULT_VALUE = "Neutral";
24 |
25 | /**
26 | * get stanford coreNLP sentiment analysis of sentence sent
27 | *
28 | * @param sentence
29 | * @return String of sentiment
30 | */
31 | public String getSentimentNew(String sentence) {
32 | if (sentence == null) {
33 | return DEFAULT_VALUE;
34 | }
35 | String output = DEFAULT_VALUE;
36 |
37 | if (sentence != null) {
38 | try {
39 | Properties props = new Properties();
40 | props.setProperty("annotators", "tokenize, ssplit, parse, sentiment");
41 | props.setProperty("parse.binaryTrees", "true");
42 | props.setProperty("enforceRequirements", "false");
43 | props.setProperty("debug", "false");
44 |
45 | StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
46 | if (sentence != null && sentence.length() > 0) {
47 | Annotation annotation = pipeline.process(sentence);
48 | for (CoreMap sentenceStructure : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
49 | output = sentenceStructure.get(SentimentCoreAnnotations.SentimentClass.class);
50 | }
51 | }
52 | } catch (Exception e) {
53 | e.printStackTrace();
54 | return DEFAULT_VALUE;
55 | }
56 | }
57 |
58 | return output;
59 | }
60 |
61 | /**
62 | * get stanford coreNLP sentiment analysis of sentence sent
63 | *
64 | * @param sentence
65 | * @return String of sentiment
66 | */
67 | public String getSentiment(String sentence) {
68 | if (sentence == null) {
69 | return "";
70 | }
71 | String output = "";
72 |
73 | if (sentence != null) {
74 | try {
75 | Properties props = new Properties();
76 | props.setProperty("annotators", "tokenize, ssplit, parse, sentiment");
77 | props.setProperty("parse.binaryTrees", "true");
78 | props.setProperty("enforceRequirements", "false");
79 | props.setProperty("debug", "false");
80 |
81 | StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
82 | int mainSentiment = 0;
83 | if (sentence != null && sentence.length() > 0) {
84 | int longest = 0;
85 | Annotation annotation = pipeline.process(sentence);
86 | for (CoreMap sentenceStructure : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
87 |
88 | String descr = sentenceStructure.get(SentimentCoreAnnotations.SentimentClass.class);
89 | System.out.println(descr);
90 |
91 | Tree tree = sentenceStructure.get(SentimentAnnotatedTree.class);
92 | int sentiment = RNNCoreAnnotations.getPredictedClass(tree);
93 | String partText = sentence.toString();
94 | if (partText.length() > longest) {
95 | mainSentiment = sentiment;
96 | longest = partText.length();
97 | }
98 | }
99 | }
100 |
101 | String sentimentString = null;
102 |
103 | if (mainSentiment == 2 || mainSentiment > 4 || mainSentiment < 0) {
104 | sentimentString = "NEUTRAL";
105 | } else if (mainSentiment == 0 || mainSentiment == 1) {
106 | sentimentString = "NEGATIVE";
107 | } else {
108 | sentimentString = "POSITIVE";
109 | }
110 |
111 | output = sentimentString;
112 | } catch (Exception e) {
113 | e.printStackTrace();
114 | }
115 | }
116 |
117 | return output;
118 | }
119 |
120 | /**
121 | * tester
122 | *
123 | * @param args
124 | */
125 | public static void main(String[] args) {
126 |
127 | if (args == null || args.length <= 0) {
128 | System.out.println("No Data");
129 | return;
130 | }
131 |
132 | long start_time = System.currentTimeMillis();
133 | SentimentService service = new SentimentService();
134 |
135 | for (int j = 0; j < args.length; j++) {
136 | System.out.println("Input: " + args[j]);
137 |
138 | System.out.println("HAPPY SENT:" + service.getSentiment("This is a happy event that happened"));
139 | long end_time = System.currentTimeMillis();
140 | long difference = end_time - start_time;
141 | long seconds = (end_time - start_time) / 1000;
142 | System.out.println("Runtime:" + difference + " seconds " + seconds);
143 | System.out
144 | .println("SAD SENT:" + service.getSentiment("This is a very bad thing that happened and I am sad"));
145 | }
146 | }
147 |
148 | }
149 |
--------------------------------------------------------------------------------
/nifi-corenlp-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one or more
2 | # contributor license agreements. See the NOTICE file distributed with
3 | # this work for additional information regarding copyright ownership.
4 | # The ASF licenses this file to You under the Apache License, Version 2.0
5 | # (the "License"); you may not use this file except in compliance with
6 | # the License. You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | com.dataflowdeveloper.processors.process.CoreNLPProcessor
16 |
--------------------------------------------------------------------------------
/nifi-corenlp-processors/src/test/java/com/dataflowdeveloper/processors/process/CoreNLPProcessorTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package com.dataflowdeveloper.processors.process;
18 |
19 | import static org.junit.Assert.assertNotNull;
20 |
21 | import java.io.File;
22 | import java.io.FileInputStream;
23 | import java.io.FileNotFoundException;
24 | import java.io.UnsupportedEncodingException;
25 | import java.util.List;
26 |
27 | import org.apache.nifi.components.PropertyDescriptor;
28 | import org.apache.nifi.processor.util.StandardValidators;
29 | import org.apache.nifi.util.MockFlowFile;
30 | import org.apache.nifi.util.TestRunner;
31 | import org.apache.nifi.util.TestRunners;
32 | import org.junit.Before;
33 | import org.junit.Test;
34 |
35 | public class CoreNLPProcessorTest {
36 |
37 | private TestRunner testRunner;
38 |
39 | public static final String ATTRIBUTE_INPUT_NAME = "sentence";
40 |
41 | public static final PropertyDescriptor MY_PROPERTY = new PropertyDescriptor
42 | .Builder().name(ATTRIBUTE_INPUT_NAME)
43 | .description("A sentence to analyze for sentiment, such as a Tweet.")
44 | .required(true)
45 | .expressionLanguageSupported(true)
46 | .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
47 | .build();
48 |
49 | @Before
50 | public void init() {
51 | testRunner = TestRunners.newTestRunner(CoreNLPProcessor.class);
52 | }
53 |
54 | @Test
55 | public void testProcessor() {
56 |
57 | testRunner.setProperty(MY_PROPERTY, "This is the worst unit test of sentiment analysis ever, just horrible. ");
58 |
59 | try {
60 | testRunner.enqueue(new FileInputStream(new File("src/test/resources/test.csv")));
61 | } catch (FileNotFoundException e) {
62 | e.printStackTrace();
63 | }
64 |
65 | testRunner.setValidateExpressionUsage(false);
66 | testRunner.run();
67 | testRunner.assertValid();
68 | List successFiles = testRunner.getFlowFilesForRelationship(CoreNLPProcessor.REL_SUCCESS);
69 |
70 | for (MockFlowFile mockFile : successFiles) {
71 | try {
72 | System.out.println("FILE:" + new String(mockFile.toByteArray(), "UTF-8"));
73 | System.out.println("Attribute: " + mockFile.getAttribute(CoreNLPProcessor.ATTRIBUTE_OUTPUT_NAME));
74 |
75 | assertNotNull( mockFile.getAttribute(CoreNLPProcessor.ATTRIBUTE_OUTPUT_NAME) );
76 | } catch (UnsupportedEncodingException e) {
77 | e.printStackTrace();
78 | }
79 | }
80 | }
81 |
82 | @Test
83 | public void testProcessorHappy() {
84 |
85 | testRunner.setProperty(MY_PROPERTY, "This is best use of Apache NiFi that I have ever seen, good job. ");
86 |
87 | try {
88 | testRunner.enqueue(new FileInputStream(new File("src/test/resources/test.csv")));
89 | } catch (FileNotFoundException e) {
90 | e.printStackTrace();
91 | }
92 |
93 | testRunner.setValidateExpressionUsage(false);
94 | testRunner.run();
95 | testRunner.assertValid();
96 | List successFiles = testRunner.getFlowFilesForRelationship(CoreNLPProcessor.REL_SUCCESS);
97 |
98 | for (MockFlowFile mockFile : successFiles) {
99 | try {
100 | System.out.println("FILE:" + new String(mockFile.toByteArray(), "UTF-8"));
101 | System.out.println("Attribute: " + mockFile.getAttribute(CoreNLPProcessor.ATTRIBUTE_OUTPUT_NAME));
102 |
103 | assertNotNull( mockFile.getAttribute(CoreNLPProcessor.ATTRIBUTE_OUTPUT_NAME) );
104 | } catch (UnsupportedEncodingException e) {
105 | e.printStackTrace();
106 | }
107 | }
108 | }
109 |
110 | @Test
111 | public void testProcessorNeutral() {
112 |
113 | testRunner.setProperty(MY_PROPERTY, "Cats are black.");
114 |
115 | try {
116 | testRunner.enqueue(new FileInputStream(new File("src/test/resources/test.csv")));
117 | } catch (FileNotFoundException e) {
118 | e.printStackTrace();
119 | }
120 |
121 | testRunner.setValidateExpressionUsage(false);
122 | testRunner.run();
123 | testRunner.assertValid();
124 | List successFiles = testRunner.getFlowFilesForRelationship(CoreNLPProcessor.REL_SUCCESS);
125 |
126 | for (MockFlowFile mockFile : successFiles) {
127 | try {
128 | System.out.println("FILE:" + new String(mockFile.toByteArray(), "UTF-8"));
129 | System.out.println("Attribute: " + mockFile.getAttribute(CoreNLPProcessor.ATTRIBUTE_OUTPUT_NAME));
130 |
131 | assertNotNull( mockFile.getAttribute(CoreNLPProcessor.ATTRIBUTE_OUTPUT_NAME) );
132 | } catch (UnsupportedEncodingException e) {
133 | e.printStackTrace();
134 | }
135 | }
136 | }
137 |
138 |
139 | }
140 |
--------------------------------------------------------------------------------
/nifi-corenlp-processors/src/test/resources/test.csv:
--------------------------------------------------------------------------------
1 | Header,Header2,Header3
2 | Value,Value2,Value3
3 | Value4,Value5,Value6
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
16 |
17 | 4.0.0
18 |
19 |
20 | org.apache.nifi
21 | nifi-nar-bundles
22 | 1.6.0
23 |
24 |
25 | com.dataflowdeveloper
26 | corenlp-processor
27 | 1.6
28 | pom
29 |
30 |
31 | nifi-corenlp-processors
32 | nifi-corenlp-nar
33 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/pushtogithub.sh:
--------------------------------------------------------------------------------
1 | git push -u origin master
2 |
--------------------------------------------------------------------------------
/upload.sh:
--------------------------------------------------------------------------------
1 | scp -i /Volumes/seagate/field.pem nifi-attributecleaner-nar/target/nifi-attributecleaner-nar-1.0.nar centos@princeton1.field.hortonworks.com:/opt/demo
2 |
--------------------------------------------------------------------------------