├── .gitignore ├── LICENSE ├── README.md ├── build.sh ├── lib └── install.sh ├── models └── dl.sh ├── nifi-corenlp-nar ├── .classpath ├── .gitignore ├── .settings │ ├── org.eclipse.core.resources.prefs │ ├── org.eclipse.jdt.core.prefs │ └── org.eclipse.m2e.core.prefs └── pom.xml ├── nifi-corenlp-processors ├── .classpath ├── .gitignore ├── .settings │ ├── org.eclipse.core.resources.prefs │ ├── org.eclipse.jdt.core.prefs │ └── org.eclipse.m2e.core.prefs ├── pom.xml └── src │ ├── main │ ├── java │ │ └── com │ │ │ └── dataflowdeveloper │ │ │ └── processors │ │ │ └── process │ │ │ ├── CoreNLPProcessor.java │ │ │ ├── Location.java │ │ │ ├── PersonName.java │ │ │ └── SentimentService.java │ └── resources │ │ └── META-INF │ │ └── services │ │ └── org.apache.nifi.processor.Processor │ └── test │ ├── java │ └── com │ │ └── dataflowdeveloper │ │ └── processors │ │ └── process │ │ └── CoreNLPProcessorTest.java │ └── resources │ └── test.csv ├── pom.xml ├── pushtogithub.sh └── upload.sh /.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | /target/*.* 3 | /.settings/ 4 | .DS_Store 5 | .springBeans 6 | .project 7 | /src/main/resources/application.yml 8 | /src/main/resources/application.properties 9 | application.* 10 | # Mobile Tools for Java (J2ME) 11 | .mtj.tmp/ 12 | deploy.sh 13 | 14 | # Package Files # 15 | *.jar 16 | *.war 17 | *.ear 18 | *.nar 19 | *.zip 20 | *.tar 21 | *.gz 22 | *.cz 23 | *.class 24 | 25 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 26 | hs_err_pid* 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # nifi-corenlp-processor 2 | Apache NiFi Custom Processor for working with Stanford CoreNLP for Sentiment Analysis in Java 8 3 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | mvn install -DskipTests 2 | -------------------------------------------------------------------------------- /lib/install.sh: -------------------------------------------------------------------------------- 1 | mvn install:install-file -Dfile=vader.jar -DgroupId=com.vader -DartifactId=vader -Dversion=1.0 -Dpackaging=jar -DgeneratePom=true 2 | -------------------------------------------------------------------------------- /models/dl.sh: -------------------------------------------------------------------------------- 1 | wget http://nlp.stanford.edu/software/stanford-english-corenlp-2018-02-27-models.jar 2 | -------------------------------------------------------------------------------- /nifi-corenlp-nar/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /nifi-corenlp-nar/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /nifi-corenlp-nar/.settings/org.eclipse.core.resources.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | encoding/=UTF-8 3 | -------------------------------------------------------------------------------- /nifi-corenlp-nar/.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 3 | org.eclipse.jdt.core.compiler.compliance=1.8 4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning 5 | org.eclipse.jdt.core.compiler.source=1.8 6 | -------------------------------------------------------------------------------- /nifi-corenlp-nar/.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /nifi-corenlp-nar/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 16 | 17 | 4.0.0 18 | 19 | 20 | com.dataflowdeveloper 21 | corenlp-processor 22 | 1.6 23 | 24 | 25 | nifi-corenlp-nar 26 | nar 27 | 28 | true 29 | true 30 | 31 | 32 | 33 | 34 | com.dataflowdeveloper 35 | nifi-corenlp-processors 36 | 1.6 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /nifi-corenlp-processors/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /nifi-corenlp-processors/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /nifi-corenlp-processors/.settings/org.eclipse.core.resources.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | encoding//src/main/java=UTF-8 3 | encoding//src/main/resources=UTF-8 4 | encoding//src/test/java=UTF-8 5 | encoding//src/test/resources=UTF-8 6 | encoding/=UTF-8 7 | -------------------------------------------------------------------------------- /nifi-corenlp-processors/.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 3 | org.eclipse.jdt.core.compiler.compliance=1.8 4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning 5 | org.eclipse.jdt.core.compiler.source=1.8 6 | -------------------------------------------------------------------------------- /nifi-corenlp-processors/.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /nifi-corenlp-processors/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 16 | 17 | 4.0.0 18 | 19 | 20 | com.dataflowdeveloper 21 | corenlp-processor 22 | 1.6 23 | 24 | 25 | nifi-corenlp-processors 26 | jar 27 | 28 | 29 | 30 | org.apache.nifi 31 | nifi-api 32 | 33 | 34 | org.apache.nifi 35 | nifi-processor-utils 36 | 1.6.0 37 | 38 | 39 | org.apache.nifi 40 | nifi-mock 41 | test 42 | 1.6.0 43 | 44 | 45 | org.slf4j 46 | slf4j-simple 47 | test 48 | 49 | 50 | junit 51 | junit 52 | test 53 | 54 | 55 | 56 | 57 | edu.stanford.nlp 58 | stanford-corenlp 59 | 3.9.1 60 | 61 | 62 | edu.stanford.nlp 63 | stanford-corenlp 64 | 3.9.1 65 | models 66 | 67 | 68 | com.google.code.gson 69 | gson 70 | 2.8.2 71 | 72 | 73 | 74 | 75 | jitpack.io 76 | https://jitpack.io 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /nifi-corenlp-processors/src/main/java/com/dataflowdeveloper/processors/process/CoreNLPProcessor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package com.dataflowdeveloper.processors.process; 18 | 19 | import java.util.ArrayList; 20 | import java.util.Collections; 21 | import java.util.HashSet; 22 | import java.util.List; 23 | import java.util.Set; 24 | 25 | import org.apache.nifi.annotation.behavior.ReadsAttribute; 26 | import org.apache.nifi.annotation.behavior.ReadsAttributes; 27 | import org.apache.nifi.annotation.behavior.WritesAttribute; 28 | import org.apache.nifi.annotation.behavior.WritesAttributes; 29 | import org.apache.nifi.annotation.documentation.CapabilityDescription; 30 | import org.apache.nifi.annotation.documentation.SeeAlso; 31 | import org.apache.nifi.annotation.documentation.Tags; 32 | import org.apache.nifi.annotation.lifecycle.OnScheduled; 33 | import org.apache.nifi.components.PropertyDescriptor; 34 | import org.apache.nifi.flowfile.FlowFile; 35 | import org.apache.nifi.processor.AbstractProcessor; 36 | import org.apache.nifi.processor.ProcessContext; 37 | import org.apache.nifi.processor.ProcessSession; 38 | import org.apache.nifi.processor.ProcessorInitializationContext; 39 | import org.apache.nifi.processor.Relationship; 40 | import org.apache.nifi.processor.exception.ProcessException; 41 | import org.apache.nifi.processor.util.StandardValidators; 42 | 43 | @Tags({ "corenlpprocessor" }) 44 | @CapabilityDescription("Run Stanford CoreNLP Sentiment Analysis") 45 | @SeeAlso({}) 46 | @ReadsAttributes({ @ReadsAttribute(attribute = "sentence", description = "sentence to analyze") }) 47 | @WritesAttributes({ 48 | @WritesAttribute(attribute = "sentiment", description = "Stanford CoreNLP sentiment analysis of that sentence.") }) 49 | public class CoreNLPProcessor extends AbstractProcessor { 50 | 51 | public static final String ATTRIBUTE_OUTPUT_NAME = "sentiment"; 52 | public static final String ATTRIBUTE_INPUT_NAME = "sentence"; 53 | public static final String PROPERTY_NAME_EXTRA = "Extra Resources"; 54 | 55 | public static final PropertyDescriptor MY_PROPERTY = new PropertyDescriptor.Builder().name(ATTRIBUTE_INPUT_NAME) 56 | .description("A sentence to parse, such as a Tweet.").required(true).expressionLanguageSupported(true) 57 | .addValidator(StandardValidators.NON_EMPTY_VALIDATOR).build(); 58 | 59 | public static final Relationship REL_SUCCESS = new Relationship.Builder().name("success") 60 | .description("Successfully determine sentiment.").build(); 61 | 62 | public static final Relationship REL_FAILURE = new Relationship.Builder().name("failure") 63 | .description("Failed to determine sentiment.").build(); 64 | 65 | private List descriptors; 66 | 67 | private Set relationships; 68 | 69 | private SentimentService service; 70 | 71 | @Override 72 | protected void init(final ProcessorInitializationContext context) { 73 | final List descriptors = new ArrayList(); 74 | descriptors.add(MY_PROPERTY); 75 | this.descriptors = Collections.unmodifiableList(descriptors); 76 | 77 | final Set relationships = new HashSet(); 78 | relationships.add(REL_SUCCESS); 79 | relationships.add(REL_FAILURE); 80 | this.relationships = Collections.unmodifiableSet(relationships); 81 | } 82 | 83 | @Override 84 | public Set getRelationships() { 85 | return this.relationships; 86 | } 87 | 88 | @Override 89 | public final List getSupportedPropertyDescriptors() { 90 | return descriptors; 91 | } 92 | 93 | /** 94 | * initialize sentiment service 95 | */ 96 | private void initService() { 97 | service = new SentimentService(); 98 | } 99 | 100 | @OnScheduled 101 | public void onScheduled(final ProcessContext context) { 102 | initService(); 103 | return; 104 | } 105 | 106 | @Override 107 | public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { 108 | FlowFile flowFile = session.get(); 109 | if (flowFile == null) { 110 | flowFile = session.create(); 111 | } 112 | if (service == null) { 113 | initService(); 114 | } 115 | try { 116 | flowFile.getAttributes(); 117 | 118 | String sentence = flowFile.getAttribute(ATTRIBUTE_INPUT_NAME); 119 | String sentence2 = context.getProperty(ATTRIBUTE_INPUT_NAME).evaluateAttributeExpressions(flowFile) 120 | .getValue(); 121 | 122 | if (sentence == null) { 123 | sentence = sentence2; 124 | } 125 | if (sentence == null) { 126 | return; 127 | } 128 | 129 | String value = service.getSentimentNew(sentence); 130 | 131 | if (value == null) { 132 | return; 133 | } 134 | 135 | flowFile = session.putAttribute(flowFile, ATTRIBUTE_OUTPUT_NAME, value); 136 | 137 | session.transfer(flowFile, REL_SUCCESS); 138 | session.commit(); 139 | } catch (final Throwable t) { 140 | getLogger().error("Unable to process Sentiment Processor file " + t.getLocalizedMessage()); 141 | getLogger().error("{} failed to process due to {}; rolling back session", new Object[] { this, t }); 142 | throw t; 143 | } 144 | } 145 | } -------------------------------------------------------------------------------- /nifi-corenlp-processors/src/main/java/com/dataflowdeveloper/processors/process/Location.java: -------------------------------------------------------------------------------- 1 | package com.dataflowdeveloper.processors.process; 2 | 3 | import java.io.Serializable; 4 | 5 | /** 6 | * 7 | * @author tspann 8 | * 9 | */ 10 | public class Location implements Serializable { 11 | 12 | private static final long serialVersionUID = -813050143597962280L; 13 | private String location = null; 14 | 15 | @Override 16 | public String toString() { 17 | StringBuilder builder = new StringBuilder(); 18 | builder.append("Location [location="); 19 | builder.append(location); 20 | builder.append("]"); 21 | return builder.toString(); 22 | } 23 | 24 | public String getLocation() { 25 | return location; 26 | } 27 | 28 | public void setLocation(String location) { 29 | this.location = location; 30 | } 31 | 32 | /** 33 | * @param location 34 | */ 35 | public Location(String location) { 36 | super(); 37 | this.location = location; 38 | } 39 | 40 | /** 41 | * 42 | */ 43 | public Location() { 44 | super(); 45 | } 46 | 47 | 48 | } 49 | -------------------------------------------------------------------------------- /nifi-corenlp-processors/src/main/java/com/dataflowdeveloper/processors/process/PersonName.java: -------------------------------------------------------------------------------- 1 | package com.dataflowdeveloper.processors.process; 2 | 3 | import java.io.Serializable; 4 | 5 | /** 6 | * 7 | * @author tspann 8 | * 9 | */ 10 | public class PersonName implements Serializable { 11 | 12 | /** 13 | * 14 | */ 15 | private static final long serialVersionUID = -864130114213352566L; 16 | 17 | private String name = ""; 18 | 19 | public String getName() { 20 | return name; 21 | } 22 | 23 | public void setName(String name) { 24 | this.name = name; 25 | } 26 | 27 | 28 | @Override 29 | public String toString() { 30 | StringBuilder builder = new StringBuilder(); 31 | builder.append("PersonName [name="); 32 | builder.append(name); 33 | builder.append("]"); 34 | return builder.toString(); 35 | } 36 | 37 | /** 38 | * 39 | */ 40 | public PersonName() { 41 | super(); 42 | } 43 | 44 | /** 45 | * @param name 46 | */ 47 | public PersonName(String name) { 48 | super(); 49 | this.name = name; 50 | } 51 | } -------------------------------------------------------------------------------- /nifi-corenlp-processors/src/main/java/com/dataflowdeveloper/processors/process/SentimentService.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.dataflowdeveloper.processors.process; 5 | 6 | import java.util.Properties; 7 | 8 | import edu.stanford.nlp.ling.CoreAnnotations; 9 | import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations; 10 | import edu.stanford.nlp.pipeline.Annotation; 11 | import edu.stanford.nlp.pipeline.StanfordCoreNLP; 12 | import edu.stanford.nlp.sentiment.SentimentCoreAnnotations; 13 | import edu.stanford.nlp.sentiment.SentimentCoreAnnotations.SentimentAnnotatedTree; 14 | import edu.stanford.nlp.trees.Tree; 15 | import edu.stanford.nlp.util.CoreMap; 16 | 17 | /** 18 | * @author tspann 19 | * 20 | */ 21 | public class SentimentService { 22 | 23 | private static final String DEFAULT_VALUE = "Neutral"; 24 | 25 | /** 26 | * get stanford coreNLP sentiment analysis of sentence sent 27 | * 28 | * @param sentence 29 | * @return String of sentiment 30 | */ 31 | public String getSentimentNew(String sentence) { 32 | if (sentence == null) { 33 | return DEFAULT_VALUE; 34 | } 35 | String output = DEFAULT_VALUE; 36 | 37 | if (sentence != null) { 38 | try { 39 | Properties props = new Properties(); 40 | props.setProperty("annotators", "tokenize, ssplit, parse, sentiment"); 41 | props.setProperty("parse.binaryTrees", "true"); 42 | props.setProperty("enforceRequirements", "false"); 43 | props.setProperty("debug", "false"); 44 | 45 | StanfordCoreNLP pipeline = new StanfordCoreNLP(props); 46 | if (sentence != null && sentence.length() > 0) { 47 | Annotation annotation = pipeline.process(sentence); 48 | for (CoreMap sentenceStructure : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { 49 | output = sentenceStructure.get(SentimentCoreAnnotations.SentimentClass.class); 50 | } 51 | } 52 | } catch (Exception e) { 53 | e.printStackTrace(); 54 | return DEFAULT_VALUE; 55 | } 56 | } 57 | 58 | return output; 59 | } 60 | 61 | /** 62 | * get stanford coreNLP sentiment analysis of sentence sent 63 | * 64 | * @param sentence 65 | * @return String of sentiment 66 | */ 67 | public String getSentiment(String sentence) { 68 | if (sentence == null) { 69 | return ""; 70 | } 71 | String output = ""; 72 | 73 | if (sentence != null) { 74 | try { 75 | Properties props = new Properties(); 76 | props.setProperty("annotators", "tokenize, ssplit, parse, sentiment"); 77 | props.setProperty("parse.binaryTrees", "true"); 78 | props.setProperty("enforceRequirements", "false"); 79 | props.setProperty("debug", "false"); 80 | 81 | StanfordCoreNLP pipeline = new StanfordCoreNLP(props); 82 | int mainSentiment = 0; 83 | if (sentence != null && sentence.length() > 0) { 84 | int longest = 0; 85 | Annotation annotation = pipeline.process(sentence); 86 | for (CoreMap sentenceStructure : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { 87 | 88 | String descr = sentenceStructure.get(SentimentCoreAnnotations.SentimentClass.class); 89 | System.out.println(descr); 90 | 91 | Tree tree = sentenceStructure.get(SentimentAnnotatedTree.class); 92 | int sentiment = RNNCoreAnnotations.getPredictedClass(tree); 93 | String partText = sentence.toString(); 94 | if (partText.length() > longest) { 95 | mainSentiment = sentiment; 96 | longest = partText.length(); 97 | } 98 | } 99 | } 100 | 101 | String sentimentString = null; 102 | 103 | if (mainSentiment == 2 || mainSentiment > 4 || mainSentiment < 0) { 104 | sentimentString = "NEUTRAL"; 105 | } else if (mainSentiment == 0 || mainSentiment == 1) { 106 | sentimentString = "NEGATIVE"; 107 | } else { 108 | sentimentString = "POSITIVE"; 109 | } 110 | 111 | output = sentimentString; 112 | } catch (Exception e) { 113 | e.printStackTrace(); 114 | } 115 | } 116 | 117 | return output; 118 | } 119 | 120 | /** 121 | * tester 122 | * 123 | * @param args 124 | */ 125 | public static void main(String[] args) { 126 | 127 | if (args == null || args.length <= 0) { 128 | System.out.println("No Data"); 129 | return; 130 | } 131 | 132 | long start_time = System.currentTimeMillis(); 133 | SentimentService service = new SentimentService(); 134 | 135 | for (int j = 0; j < args.length; j++) { 136 | System.out.println("Input: " + args[j]); 137 | 138 | System.out.println("HAPPY SENT:" + service.getSentiment("This is a happy event that happened")); 139 | long end_time = System.currentTimeMillis(); 140 | long difference = end_time - start_time; 141 | long seconds = (end_time - start_time) / 1000; 142 | System.out.println("Runtime:" + difference + " seconds " + seconds); 143 | System.out 144 | .println("SAD SENT:" + service.getSentiment("This is a very bad thing that happened and I am sad")); 145 | } 146 | } 147 | 148 | } 149 | -------------------------------------------------------------------------------- /nifi-corenlp-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | com.dataflowdeveloper.processors.process.CoreNLPProcessor 16 | -------------------------------------------------------------------------------- /nifi-corenlp-processors/src/test/java/com/dataflowdeveloper/processors/process/CoreNLPProcessorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package com.dataflowdeveloper.processors.process; 18 | 19 | import static org.junit.Assert.assertNotNull; 20 | 21 | import java.io.File; 22 | import java.io.FileInputStream; 23 | import java.io.FileNotFoundException; 24 | import java.io.UnsupportedEncodingException; 25 | import java.util.List; 26 | 27 | import org.apache.nifi.components.PropertyDescriptor; 28 | import org.apache.nifi.processor.util.StandardValidators; 29 | import org.apache.nifi.util.MockFlowFile; 30 | import org.apache.nifi.util.TestRunner; 31 | import org.apache.nifi.util.TestRunners; 32 | import org.junit.Before; 33 | import org.junit.Test; 34 | 35 | public class CoreNLPProcessorTest { 36 | 37 | private TestRunner testRunner; 38 | 39 | public static final String ATTRIBUTE_INPUT_NAME = "sentence"; 40 | 41 | public static final PropertyDescriptor MY_PROPERTY = new PropertyDescriptor 42 | .Builder().name(ATTRIBUTE_INPUT_NAME) 43 | .description("A sentence to analyze for sentiment, such as a Tweet.") 44 | .required(true) 45 | .expressionLanguageSupported(true) 46 | .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) 47 | .build(); 48 | 49 | @Before 50 | public void init() { 51 | testRunner = TestRunners.newTestRunner(CoreNLPProcessor.class); 52 | } 53 | 54 | @Test 55 | public void testProcessor() { 56 | 57 | testRunner.setProperty(MY_PROPERTY, "This is the worst unit test of sentiment analysis ever, just horrible. "); 58 | 59 | try { 60 | testRunner.enqueue(new FileInputStream(new File("src/test/resources/test.csv"))); 61 | } catch (FileNotFoundException e) { 62 | e.printStackTrace(); 63 | } 64 | 65 | testRunner.setValidateExpressionUsage(false); 66 | testRunner.run(); 67 | testRunner.assertValid(); 68 | List successFiles = testRunner.getFlowFilesForRelationship(CoreNLPProcessor.REL_SUCCESS); 69 | 70 | for (MockFlowFile mockFile : successFiles) { 71 | try { 72 | System.out.println("FILE:" + new String(mockFile.toByteArray(), "UTF-8")); 73 | System.out.println("Attribute: " + mockFile.getAttribute(CoreNLPProcessor.ATTRIBUTE_OUTPUT_NAME)); 74 | 75 | assertNotNull( mockFile.getAttribute(CoreNLPProcessor.ATTRIBUTE_OUTPUT_NAME) ); 76 | } catch (UnsupportedEncodingException e) { 77 | e.printStackTrace(); 78 | } 79 | } 80 | } 81 | 82 | @Test 83 | public void testProcessorHappy() { 84 | 85 | testRunner.setProperty(MY_PROPERTY, "This is best use of Apache NiFi that I have ever seen, good job. "); 86 | 87 | try { 88 | testRunner.enqueue(new FileInputStream(new File("src/test/resources/test.csv"))); 89 | } catch (FileNotFoundException e) { 90 | e.printStackTrace(); 91 | } 92 | 93 | testRunner.setValidateExpressionUsage(false); 94 | testRunner.run(); 95 | testRunner.assertValid(); 96 | List successFiles = testRunner.getFlowFilesForRelationship(CoreNLPProcessor.REL_SUCCESS); 97 | 98 | for (MockFlowFile mockFile : successFiles) { 99 | try { 100 | System.out.println("FILE:" + new String(mockFile.toByteArray(), "UTF-8")); 101 | System.out.println("Attribute: " + mockFile.getAttribute(CoreNLPProcessor.ATTRIBUTE_OUTPUT_NAME)); 102 | 103 | assertNotNull( mockFile.getAttribute(CoreNLPProcessor.ATTRIBUTE_OUTPUT_NAME) ); 104 | } catch (UnsupportedEncodingException e) { 105 | e.printStackTrace(); 106 | } 107 | } 108 | } 109 | 110 | @Test 111 | public void testProcessorNeutral() { 112 | 113 | testRunner.setProperty(MY_PROPERTY, "Cats are black."); 114 | 115 | try { 116 | testRunner.enqueue(new FileInputStream(new File("src/test/resources/test.csv"))); 117 | } catch (FileNotFoundException e) { 118 | e.printStackTrace(); 119 | } 120 | 121 | testRunner.setValidateExpressionUsage(false); 122 | testRunner.run(); 123 | testRunner.assertValid(); 124 | List successFiles = testRunner.getFlowFilesForRelationship(CoreNLPProcessor.REL_SUCCESS); 125 | 126 | for (MockFlowFile mockFile : successFiles) { 127 | try { 128 | System.out.println("FILE:" + new String(mockFile.toByteArray(), "UTF-8")); 129 | System.out.println("Attribute: " + mockFile.getAttribute(CoreNLPProcessor.ATTRIBUTE_OUTPUT_NAME)); 130 | 131 | assertNotNull( mockFile.getAttribute(CoreNLPProcessor.ATTRIBUTE_OUTPUT_NAME) ); 132 | } catch (UnsupportedEncodingException e) { 133 | e.printStackTrace(); 134 | } 135 | } 136 | } 137 | 138 | 139 | } 140 | -------------------------------------------------------------------------------- /nifi-corenlp-processors/src/test/resources/test.csv: -------------------------------------------------------------------------------- 1 | Header,Header2,Header3 2 | Value,Value2,Value3 3 | Value4,Value5,Value6 -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 16 | 17 | 4.0.0 18 | 19 | 20 | org.apache.nifi 21 | nifi-nar-bundles 22 | 1.6.0 23 | 24 | 25 | com.dataflowdeveloper 26 | corenlp-processor 27 | 1.6 28 | pom 29 | 30 | 31 | nifi-corenlp-processors 32 | nifi-corenlp-nar 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /pushtogithub.sh: -------------------------------------------------------------------------------- 1 | git push -u origin master 2 | -------------------------------------------------------------------------------- /upload.sh: -------------------------------------------------------------------------------- 1 | scp -i /Volumes/seagate/field.pem nifi-attributecleaner-nar/target/nifi-attributecleaner-nar-1.0.nar centos@princeton1.field.hortonworks.com:/opt/demo 2 | --------------------------------------------------------------------------------