├── .gitignore ├── Jenkinsfile ├── LICENSE ├── README.md ├── bin └── debug.sh ├── config ├── .gitignore └── connect-avro-docker.properties ├── docker-compose.yml ├── pom.xml └── src ├── main └── java │ └── com │ └── github │ └── jcustenborder │ └── kafka │ └── connect │ └── twitter │ ├── StatusConverter.java │ ├── TwitterSourceConnector.java │ ├── TwitterSourceConnectorConfig.java │ ├── TwitterSourceTask.java │ └── package-info.java └── test ├── java └── com │ └── github │ └── jcustenborder │ └── kafka │ └── connect │ └── twitter │ ├── DocumentationTest.java │ ├── SchemaGeneratorTest.java │ ├── StatusConverterTest.java │ ├── TwitterSourceConnectorTest.java │ └── TwitterSourceTaskTest.java └── resources └── logback.xml /.gitignore: -------------------------------------------------------------------------------- 1 | TwitterSourceConnector.properties 2 | *.iml 3 | /target/ 4 | .okhttpcache 5 | -------------------------------------------------------------------------------- /Jenkinsfile: -------------------------------------------------------------------------------- 1 | #!groovy 2 | @Library('jenkins-pipeline') import com.github.jcustenborder.jenkins.pipeline.KafkaConnectPipeline 3 | 4 | def pipe = new KafkaConnectPipeline() 5 | pipe.execute() -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | This connector uses the twitter streaming api to listen for status update messages and 4 | convert them to a Kafka Connect struct on the fly. The goal is to match as much of the 5 | Twitter Status object as possible. 6 | 7 | # Configuration 8 | 9 | ## TwitterSourceConnector 10 | 11 | This Twitter Source connector is used to pull data from Twitter in realtime. 12 | 13 | ```properties 14 | name=connector1 15 | tasks.max=1 16 | connector.class=com.github.jcustenborder.kafka.connect.twitter.TwitterSourceConnector 17 | 18 | # Set these required values 19 | twitter.oauth.accessTokenSecret= 20 | process.deletes= 21 | filter.keywords= 22 | kafka.status.topic= 23 | kafka.delete.topic= 24 | twitter.oauth.consumerSecret= 25 | twitter.oauth.accessToken= 26 | twitter.oauth.consumerKey= 27 | ``` 28 | 29 | | Name | Description | Type | Default | Valid Values | Importance | 30 | |---------------------------------|---------------------------------------------------|----------|---------|--------------|------------| 31 | | filter.keywords | Twitter keywords to filter for. | list | | | high | 32 | | filter.userIds | Twitter user IDs to follow. | list | "" | | low | 33 | | kafka.delete.topic | Kafka topic to write delete events to. | string | | | high | 34 | | kafka.status.topic | Kafka topic to write the statuses to. | string | | | high | 35 | | process.deletes | Should this connector process deletes. | boolean | | | high | 36 | | twitter.oauth.accessToken | OAuth access token | password | | | high | 37 | | twitter.oauth.accessTokenSecret | OAuth access token secret | password | | | high | 38 | | twitter.oauth.consumerKey | OAuth consumer key | password | | | high | 39 | | twitter.oauth.consumerSecret | OAuth consumer secret | password | | | high | 40 | | twitter.debug | Flag to enable debug logging for the twitter api. | boolean | false | | low | 41 | 42 | 43 | # Schemas 44 | 45 | ## com.github.jcustenborder.kafka.connect.twitter.Place 46 | 47 | Returns the place attached to this status 48 | 49 | | Name | Optional | Schema | Default Value | Documentation | 50 | |---------------|----------|-------------------------------------------------------------------------------------------------------|---------------|---------------| 51 | | Name | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 52 | | StreetAddress | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 53 | | CountryCode | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 54 | | Id | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 55 | | Country | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 56 | | PlaceType | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 57 | | URL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 58 | | FullName | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 59 | 60 | ## com.github.jcustenborder.kafka.connect.twitter.GeoLocation 61 | 62 | Returns The location that this tweet refers to if available. 63 | 64 | | Name | Optional | Schema | Default Value | Documentation | 65 | |-----------|----------|---------------------------------------------------------------------------------------------------------|---------------|-------------------------------------------| 66 | | Latitude | false | [Float64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#FLOAT64) | | returns the latitude of the geo location | 67 | | Longitude | false | [Float64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#FLOAT64) | | returns the longitude of the geo location | 68 | 69 | ## com.github.jcustenborder.kafka.connect.twitter.StatusDeletionNotice 70 | 71 | Message that is received when a status is deleted from Twitter. 72 | 73 | | Name | Optional | Schema | Default Value | Documentation | 74 | |----------|----------|-----------------------------------------------------------------------------------------------------|---------------|---------------| 75 | | StatusId | false | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | | 76 | | UserId | false | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | | 77 | 78 | ## com.github.jcustenborder.kafka.connect.twitter.StatusDeletionNoticeKey 79 | 80 | Key for a message that is received when a status is deleted from Twitter. 81 | 82 | | Name | Optional | Schema | Default Value | Documentation | 83 | |----------|----------|-----------------------------------------------------------------------------------------------------|---------------|---------------| 84 | | StatusId | false | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | | 85 | 86 | ## com.github.jcustenborder.kafka.connect.twitter.StatusKey 87 | 88 | Key for a twitter status. 89 | 90 | | Name | Optional | Schema | Default Value | Documentation | 91 | |------|----------|-----------------------------------------------------------------------------------------------------|---------------|---------------| 92 | | Id | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | | 93 | 94 | ## com.github.jcustenborder.kafka.connect.twitter.Status 95 | 96 | Twitter status message. 97 | 98 | | Name | Optional | Schema | Default Value | Documentation | 99 | |----------------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------------------------------------------------------------------------------------------------------------------------------| 100 | | CreatedAt | true | [Timestamp](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Timestamp.html) | | Return the created_at | 101 | | Id | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the id of the status | 102 | | Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the text of the status | 103 | | Source | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the source | 104 | | Truncated | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Test if the status is truncated | 105 | | InReplyToStatusId | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the in_reply_tostatus_id | 106 | | InReplyToUserId | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the in_reply_user_id | 107 | | InReplyToScreenName | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the in_reply_to_screen_name | 108 | | GeoLocation | true | [com.github.jcustenborder.kafka.connect.twitter.GeoLocation](#com.github.jcustenborder.kafka.connect.twitter.GeoLocation) | | Returns The location that this tweet refers to if available. | 109 | | Place | true | [com.github.jcustenborder.kafka.connect.twitter.Place](#com.github.jcustenborder.kafka.connect.twitter.Place) | | Returns the place attached to this status | 110 | | Favorited | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Test if the status is favorited | 111 | | Retweeted | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Test if the status is retweeted | 112 | | FavoriteCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Indicates approximately how many times this Tweet has been "favorited" by Twitter users. | 113 | | User | false | [com.github.jcustenborder.kafka.connect.twitter.User](#com.github.jcustenborder.kafka.connect.twitter.User) | | Return the user associated with the status. 114 | This can be null if the instance is from User.getStatus(). | 115 | | Retweet | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | 116 | | Contributors | false | Array of [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns an array of contributors, or null if no contributor is associated with this status. | 117 | | RetweetCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the number of times this tweet has been retweeted, or -1 when the tweet was created before this feature was enabled. | 118 | | RetweetedByMe | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | 119 | | CurrentUserRetweetId | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the authenticating user's retweet's id of this tweet, or -1L when the tweet was created before this feature was enabled. | 120 | | PossiblySensitive | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | 121 | | Lang | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the lang of the status text if available. | 122 | | WithheldInCountries | false | Array of [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the list of country codes where the tweet is withheld | 123 | | HashtagEntities | true | Array of [com.github.jcustenborder.kafka.connect.twitter.HashtagEntity](#com.github.jcustenborder.kafka.connect.twitter.HashtagEntity) | | Returns an array if hashtag mentioned in the tweet. | 124 | | UserMentionEntities | true | Array of [com.github.jcustenborder.kafka.connect.twitter.UserMentionEntity](#com.github.jcustenborder.kafka.connect.twitter.UserMentionEntity) | | Returns an array of user mentions in the tweet. | 125 | | MediaEntities | true | Array of [com.github.jcustenborder.kafka.connect.twitter.MediaEntity](#com.github.jcustenborder.kafka.connect.twitter.MediaEntity) | | Returns an array of MediaEntities if medias are available in the tweet. | 126 | | SymbolEntities | true | Array of [com.github.jcustenborder.kafka.connect.twitter.SymbolEntity](#com.github.jcustenborder.kafka.connect.twitter.SymbolEntity) | | Returns an array of SymbolEntities if medias are available in the tweet. | 127 | | URLEntities | true | Array of [com.github.jcustenborder.kafka.connect.twitter.URLEntity](#com.github.jcustenborder.kafka.connect.twitter.URLEntity) | | Returns an array if URLEntity mentioned in the tweet. | 128 | 129 | ## com.github.jcustenborder.kafka.connect.twitter.User 130 | 131 | Return the user associated with the status. 132 | This can be null if the instance is from User.getStatus(). 133 | 134 | | Name | Optional | Schema | Default Value | Documentation | 135 | |--------------------------------|----------|----------------------------------------------------------------------------------------------------------------|---------------|----------------------------------------------------------------------------------------------| 136 | | Id | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the id of the user | 137 | | Name | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the name of the user | 138 | | ScreenName | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the screen name of the user | 139 | | Location | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the location of the user | 140 | | Description | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the description of the user | 141 | | ContributorsEnabled | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Tests if the user is enabling contributors | 142 | | ProfileImageURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the profile image url of the user | 143 | | BiggerProfileImageURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 144 | | MiniProfileImageURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 145 | | OriginalProfileImageURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 146 | | ProfileImageURLHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 147 | | BiggerProfileImageURLHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 148 | | MiniProfileImageURLHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 149 | | OriginalProfileImageURLHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 150 | | DefaultProfileImage | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Tests if the user has not uploaded their own avatar | 151 | | URL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the url of the user | 152 | | Protected | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Test if the user status is protected | 153 | | FollowersCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the number of followers | 154 | | ProfileBackgroundColor | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 155 | | ProfileTextColor | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 156 | | ProfileLinkColor | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 157 | | ProfileSidebarFillColor | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 158 | | ProfileSidebarBorderColor | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 159 | | ProfileUseBackgroundImage | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | 160 | | DefaultProfile | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Tests if the user has not altered the theme or background | 161 | | ShowAllInlineMedia | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | 162 | | FriendsCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the number of users the user follows (AKA "followings") | 163 | | CreatedAt | true | [Timestamp](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Timestamp.html) | | | 164 | | FavouritesCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | 165 | | UtcOffset | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | 166 | | TimeZone | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 167 | | ProfileBackgroundImageURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 168 | | ProfileBackgroundImageUrlHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 169 | | ProfileBannerURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 170 | | ProfileBannerRetinaURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 171 | | ProfileBannerIPadURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 172 | | ProfileBannerIPadRetinaURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 173 | | ProfileBannerMobileURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 174 | | ProfileBannerMobileRetinaURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 175 | | ProfileBackgroundTiled | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | 176 | | Lang | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the preferred language of the user | 177 | | StatusesCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | 178 | | GeoEnabled | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | 179 | | Verified | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | 180 | | Translator | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | | 181 | | ListedCount | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the number of public lists the user is listed on, or -1 if the count is unavailable. | 182 | | FollowRequestSent | true | [Boolean](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#BOOLEAN) | | Returns true if the authenticating user has requested to follow this user, otherwise false. | 183 | | WithheldInCountries | false | Array of [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the list of country codes where the user is withheld | 184 | 185 | ## com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity.Variant 186 | 187 | | Name | Optional | Schema | Default Value | Documentation | 188 | |-------------|----------|-------------------------------------------------------------------------------------------------------|---------------|---------------| 189 | | Url | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 190 | | Bitrate | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | 191 | | ContentType | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 192 | 193 | ## com.github.jcustenborder.kafka.connect.twitter.MediaEntity.Size 194 | 195 | | Name | Optional | Schema | Default Value | Documentation | 196 | |--------|----------|-----------------------------------------------------------------------------------------------------|---------------|---------------| 197 | | Resize | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | 198 | | Width | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | 199 | | Height | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | 200 | 201 | ## com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity 202 | 203 | | Name | Optional | Schema | Default Value | Documentation | 204 | |------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|-----------------------------------------------------------------------------| 205 | | VideoAspectRatioWidth | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | 206 | | VideoAspectRatioHeight | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | 207 | | VideoDurationMillis | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | | 208 | | VideoVariants | true | Array of [com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity.Variant](#com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity.Variant) | | | 209 | | ExtAltText | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 210 | | Id | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the id of the media. | 211 | | Type | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the media type photo, video, animated_gif. | 212 | | MediaURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the media URL. | 213 | | Sizes | false | Map of <[Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32), [com.github.jcustenborder.kafka.connect.twitter.MediaEntity.Size](#com.github.jcustenborder.kafka.connect.twitter.MediaEntity.Size)> | | Returns size variations of the media. | 214 | | MediaURLHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the media secure URL. | 215 | | URL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the URL mentioned in the tweet. | 216 | | Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the URL mentioned in the tweet. | 217 | | ExpandedURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the expanded URL if mentioned URL is shorten. | 218 | | Start | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the start character of the URL mentioned in the tweet. | 219 | | End | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the end character of the URL mentioned in the tweet. | 220 | | DisplayURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the display URL if mentioned URL is shorten. | 221 | 222 | ## com.github.jcustenborder.kafka.connect.twitter.HashtagEntity 223 | 224 | | Name | Optional | Schema | Default Value | Documentation | 225 | |-------|----------|-------------------------------------------------------------------------------------------------------|---------------|----------------------------------------------------------| 226 | | Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the text of the hashtag without #. | 227 | | Start | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the start character of the hashtag. | 228 | | End | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the end character of the hashtag. | 229 | 230 | ## com.github.jcustenborder.kafka.connect.twitter.MediaEntity 231 | 232 | | Name | Optional | Schema | Default Value | Documentation | 233 | |------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|-----------------------------------------------------------------------------| 234 | | Id | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the id of the media. | 235 | | Type | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the media type photo, video, animated_gif. | 236 | | MediaURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the media URL. | 237 | | Sizes | false | Map of <[Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32), [com.github.jcustenborder.kafka.connect.twitter.MediaEntity.Size](#com.github.jcustenborder.kafka.connect.twitter.MediaEntity.Size)> | | | 238 | | MediaURLHttps | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the media secure URL. | 239 | | VideoAspectRatioWidth | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | 240 | | VideoAspectRatioHeight | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | | 241 | | VideoDurationMillis | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | | 242 | | VideoVariants | true | Array of [com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity.Variant](#com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity.Variant) | | Returns size variations of the media. | 243 | | ExtAltText | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | | 244 | | URL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the URL mentioned in the tweet. | 245 | | Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the URL mentioned in the tweet. | 246 | | ExpandedURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the expanded URL if mentioned URL is shorten. | 247 | | Start | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the start character of the URL mentioned in the tweet. | 248 | | End | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the end character of the URL mentioned in the tweet. | 249 | | DisplayURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the display URL if mentioned URL is shorten. | 250 | 251 | ## com.github.jcustenborder.kafka.connect.twitter.SymbolEntity 252 | 253 | | Name | Optional | Schema | Default Value | Documentation | 254 | |-------|----------|-------------------------------------------------------------------------------------------------------|---------------|---------------------------------------------------------| 255 | | Start | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the start character of the symbol. | 256 | | End | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the end character of the symbol. | 257 | | Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the text of the entity | 258 | 259 | ## com.github.jcustenborder.kafka.connect.twitter.URLEntity 260 | 261 | | Name | Optional | Schema | Default Value | Documentation | 262 | |-------------|----------|-------------------------------------------------------------------------------------------------------|---------------|-----------------------------------------------------------------------------| 263 | | URL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the URL mentioned in the tweet. | 264 | | Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the URL mentioned in the tweet. | 265 | | ExpandedURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the expanded URL if mentioned URL is shorten. | 266 | | Start | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the start character of the URL mentioned in the tweet. | 267 | | End | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the end character of the URL mentioned in the tweet. | 268 | | DisplayURL | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the display URL if mentioned URL is shorten. | 269 | 270 | ## com.github.jcustenborder.kafka.connect.twitter.UserMentionEntity 271 | 272 | | Name | Optional | Schema | Default Value | Documentation | 273 | |------------|----------|-------------------------------------------------------------------------------------------------------|---------------|---------------------------------------------------------------| 274 | | Name | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the name mentioned in the status. | 275 | | Id | true | [Int64](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT64) | | Returns the user id mentioned in the status. | 276 | | Text | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the screen name mentioned in the status. | 277 | | ScreenName | true | [String](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#STRING) | | Returns the screen name mentioned in the status. | 278 | | Start | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the start character of the user mention. | 279 | | End | true | [Int32](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html#INT32) | | Returns the index of the end character of the user mention. | 280 | 281 | 282 | # Running in development 283 | 284 | ``` 285 | mvn clean package 286 | export CLASSPATH="$(find target/ -type f -name '*.jar'| grep '\-package' | tr '\n' ':')" 287 | $CONFLUENT_HOME/bin/connect-standalone connect/connect-avro-docker.properties config/TwitterSourceConnector.properties 288 | ``` 289 | -------------------------------------------------------------------------------- /bin/debug.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | export DEBUG_SUSPEND_FLAG='y' 19 | export KAFKA_DEBUG='y' 20 | export JAVA_DEBUG_PORT='5006' 21 | set -e 22 | 23 | mvn clean package 24 | 25 | connect-standalone config/connect-avro-docker.properties /Users/jeremy/source/opensource/kafka-connect/kafka-connect-twitter/config/TwitterSourceConnector.properties -------------------------------------------------------------------------------- /config/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcustenborder/kafka-connect-twitter/6cb28cc8c4dc58703fe327af66a61285e6a0c247/config/.gitignore -------------------------------------------------------------------------------- /config/connect-avro-docker.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # Sample configuration for a standalone Kafka Connect worker that uses Avro serialization and 18 | # integrates the the SchemaConfig Registry. This sample configuration assumes a local installation of 19 | # Confluent Platform with all services running on their default ports. 20 | # Bootstrap Kafka servers. If multiple servers are specified, they should be comma-separated. 21 | bootstrap.servers=kafka:9092 22 | # The converters specify the format of data in Kafka and how to translate it into Connect data. 23 | # Every Connect user will need to configure these based on the format they want their data in 24 | # when loaded from or stored into Kafka 25 | key.converter=io.confluent.connect.avro.AvroConverter 26 | key.converter.schema.registry.url=http://schema-registry:8081 27 | value.converter=io.confluent.connect.avro.AvroConverter 28 | value.converter.schema.registry.url=http://schema-registry:8081 29 | # The internal converter used for offsets and config data is configurable and must be specified, 30 | # but most users will always want to use the built-in default. Offset and config data is never 31 | # visible outside of Connect in this format. 32 | internal.key.converter=org.apache.kafka.connect.json.JsonConverter 33 | internal.value.converter=org.apache.kafka.connect.json.JsonConverter 34 | internal.key.converter.schemas.enable=false 35 | internal.value.converter.schemas.enable=false 36 | # Local storage file for offset data 37 | offset.storage.file.filename=/tmp/connect.offsets 38 | # Confuent Control Center Integration -- uncomment these lines to enable Kafka client interceptors 39 | # that will report audit data that can be displayed and analyzed in Confluent Control Center 40 | # producer.interceptor.classes=io.confluent.monitoring.clients.interceptor.MonitoringProducerInterceptor 41 | # consumer.interceptor.classes=io.confluent.monitoring.clients.interceptor.MonitoringConsumerInterceptor 42 | rest.port=8089 43 | plugin.path=target/kafka-connect-target/usr/share/kafka-connect 44 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | version: "2" 18 | services: 19 | zookeeper: 20 | image: confluentinc/cp-zookeeper:5.2.1 21 | ports: 22 | - "2181:2181" 23 | environment: 24 | ZOOKEEPER_CLIENT_PORT: 2181 25 | kafka: 26 | image: confluentinc/cp-kafka:5.2.1 27 | depends_on: 28 | - zookeeper 29 | ports: 30 | - "9092:9092" 31 | environment: 32 | KAFKA_ZOOKEEPER_CONNECT: "zookeeper:2181" 33 | KAFKA_ADVERTISED_LISTENERS: "plaintext://kafka:9092" 34 | KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 35 | schema-registry: 36 | image: confluentinc/cp-schema-registry:5.2.1 37 | depends_on: 38 | - kafka 39 | - zookeeper 40 | ports: 41 | - "8081:8081" 42 | environment: 43 | SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL: "zookeeper:2181" 44 | SCHEMA_REGISTRY_HOST_NAME: schema-registry 45 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | com.github.jcustenborder.kafka.connect 8 | kafka-connect-parent 9 | 2.8.0-1 10 | 11 | kafka-connect-twitter 12 | 0.3-SNAPSHOT 13 | kafka-connect-twitter 14 | https://github.com/jcustenborder/kafka-connect-twitter 15 | 2016 16 | Kafka Connect plugin for streaming data from Twitter to Kafka. 17 | 18 | 19 | Apache License 2.0 20 | https:/github.com/jcustenborder/kafka-connect-twitter/LICENSE 21 | repo 22 | 23 | 24 | 25 | 26 | jcustenborder 27 | Jeremy Custenborder 28 | https://github.com/jcustenborder 29 | 30 | Committer 31 | 32 | 33 | 34 | 35 | scm:git:https://github.com/jcustenborder/kafka-connect-twitter.git 36 | scm:git:git@github.com:jcustenborder/kafka-connect-twitter.git 37 | https://github.com/jcustenborder/kafka-connect-twitter 38 | 39 | 40 | github 41 | https://github.com/jcustenborder/kafka-connect-twitter/issues 42 | 43 | 44 | 4.0.6 45 | 46 | 47 | 48 | org.twitter4j 49 | twitter4j-core 50 | ${twitter4j.version} 51 | 52 | 53 | org.twitter4j 54 | twitter4j-stream 55 | ${twitter4j.version} 56 | 57 | 58 | 59 | 60 | 61 | io.confluent 62 | kafka-connect-maven-plugin 63 | 64 | true 65 | https://jcustenborder.github.io/kafka-connect-documentation/ 66 | 67 | source 68 | 69 | 70 | Twitter 71 | Social 72 | 73 | Kafka Connect Twitter 74 | ${pom.issueManagement.url} 75 | Support provided through community involvement. 76 | 77 | 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/twitter/StatusConverter.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.twitter; 17 | 18 | import org.apache.kafka.connect.data.Schema; 19 | import org.apache.kafka.connect.data.SchemaBuilder; 20 | import org.apache.kafka.connect.data.Struct; 21 | import org.apache.kafka.connect.data.Timestamp; 22 | import twitter4j.ExtendedMediaEntity; 23 | import twitter4j.GeoLocation; 24 | import twitter4j.HashtagEntity; 25 | import twitter4j.MediaEntity; 26 | import twitter4j.Place; 27 | import twitter4j.Status; 28 | import twitter4j.StatusDeletionNotice; 29 | import twitter4j.SymbolEntity; 30 | import twitter4j.URLEntity; 31 | import twitter4j.User; 32 | import twitter4j.UserMentionEntity; 33 | 34 | import java.util.ArrayList; 35 | import java.util.LinkedHashMap; 36 | import java.util.List; 37 | import java.util.Map; 38 | 39 | public class StatusConverter { 40 | 41 | 42 | public final static Schema PLACE_SCHEMA; 43 | public final static Schema GEO_LOCATION_SCHEMA; 44 | public static final Schema SCHEMA_STATUS_DELETION_NOTICE; 45 | public static final Schema SCHEMA_STATUS_DELETION_NOTICE_KEY; 46 | public static final Schema STATUS_SCHEMA_KEY; 47 | public static final Schema STATUS_SCHEMA; 48 | 49 | public static final Schema USER_SCHEMA = SchemaBuilder.struct() 50 | .name("com.github.jcustenborder.kafka.connect.twitter.User") 51 | .doc("Return the user associated with the status. This can be null if the instance is from User.getStatus().") 52 | .field("Id", SchemaBuilder.int64().doc("Returns the id of the user").optional().build()) 53 | .field("Name", SchemaBuilder.string().doc("Returns the name of the user").optional().build()) 54 | .field("ScreenName", SchemaBuilder.string().doc("Returns the screen name of the user").optional().build()) 55 | .field("Location", SchemaBuilder.string().doc("Returns the location of the user").optional().build()) 56 | .field("Description", SchemaBuilder.string().doc("Returns the description of the user").optional().build()) 57 | .field("ContributorsEnabled", SchemaBuilder.bool().doc("Tests if the user is enabling contributors").optional().build()) 58 | .field("ProfileImageURL", SchemaBuilder.string().doc("Returns the profile image url of the user").optional().build()) 59 | .field("BiggerProfileImageURL", SchemaBuilder.string().optional().build()) 60 | .field("MiniProfileImageURL", SchemaBuilder.string().optional().build()) 61 | .field("OriginalProfileImageURL", SchemaBuilder.string().optional().build()) 62 | .field("ProfileImageURLHttps", SchemaBuilder.string().optional().build()) 63 | .field("BiggerProfileImageURLHttps", SchemaBuilder.string().optional().build()) 64 | .field("MiniProfileImageURLHttps", SchemaBuilder.string().optional().build()) 65 | .field("OriginalProfileImageURLHttps", SchemaBuilder.string().optional().build()) 66 | .field("DefaultProfileImage", SchemaBuilder.bool().doc("Tests if the user has not uploaded their own avatar").optional().build()) 67 | .field("URL", SchemaBuilder.string().doc("Returns the url of the user").optional().build()) 68 | .field("Protected", SchemaBuilder.bool().doc("Test if the user status is protected").optional().build()) 69 | .field("FollowersCount", SchemaBuilder.int32().doc("Returns the number of followers").optional().build()) 70 | .field("ProfileBackgroundColor", SchemaBuilder.string().optional().build()) 71 | .field("ProfileTextColor", SchemaBuilder.string().optional().build()) 72 | .field("ProfileLinkColor", SchemaBuilder.string().optional().build()) 73 | .field("ProfileSidebarFillColor", SchemaBuilder.string().optional().build()) 74 | .field("ProfileSidebarBorderColor", SchemaBuilder.string().optional().build()) 75 | .field("ProfileUseBackgroundImage", SchemaBuilder.bool().optional().build()) 76 | .field("DefaultProfile", SchemaBuilder.bool().doc("Tests if the user has not altered the theme or background").optional().build()) 77 | .field("ShowAllInlineMedia", SchemaBuilder.bool().optional().build()) 78 | .field("FriendsCount", SchemaBuilder.int32().doc("Returns the number of users the user follows (AKA \"followings\")").optional().build()) 79 | .field("CreatedAt", Timestamp.builder().optional().build()) 80 | .field("FavouritesCount", SchemaBuilder.int32().optional().build()) 81 | .field("UtcOffset", SchemaBuilder.int32().optional().build()) 82 | .field("TimeZone", SchemaBuilder.string().optional().build()) 83 | .field("ProfileBackgroundImageURL", SchemaBuilder.string().optional().build()) 84 | .field("ProfileBackgroundImageUrlHttps", SchemaBuilder.string().optional().build()) 85 | .field("ProfileBannerURL", SchemaBuilder.string().optional().build()) 86 | .field("ProfileBannerRetinaURL", SchemaBuilder.string().optional().build()) 87 | .field("ProfileBannerIPadURL", SchemaBuilder.string().optional().build()) 88 | .field("ProfileBannerIPadRetinaURL", SchemaBuilder.string().optional().build()) 89 | .field("ProfileBannerMobileURL", SchemaBuilder.string().optional().build()) 90 | .field("ProfileBannerMobileRetinaURL", SchemaBuilder.string().optional().build()) 91 | .field("ProfileBackgroundTiled", SchemaBuilder.bool().optional().build()) 92 | .field("Lang", SchemaBuilder.string().doc("Returns the preferred language of the user").optional().build()) 93 | .field("StatusesCount", SchemaBuilder.int32().optional().build()) 94 | .field("GeoEnabled", SchemaBuilder.bool().optional().build()) 95 | .field("Verified", SchemaBuilder.bool().optional().build()) 96 | .field("Translator", SchemaBuilder.bool().optional().build()) 97 | .field("ListedCount", SchemaBuilder.int32().doc("Returns the number of public lists the user is listed on, or -1 if the count is unavailable.").optional().build()) 98 | .field("FollowRequestSent", SchemaBuilder.bool().doc("Returns true if the authenticating user has requested to follow this user, otherwise false.").optional().build()) 99 | .field("WithheldInCountries", SchemaBuilder.array(Schema.STRING_SCHEMA).doc("Returns the list of country codes where the user is withheld").build()) 100 | .build(); 101 | 102 | static { 103 | PLACE_SCHEMA = SchemaBuilder.struct() 104 | .name("com.github.jcustenborder.kafka.connect.twitter.Place") 105 | .optional() 106 | .doc("Returns the place attached to this status") 107 | .field("Name", SchemaBuilder.string().optional().build()) 108 | .field("StreetAddress", SchemaBuilder.string().optional().build()) 109 | .field("CountryCode", SchemaBuilder.string().optional().build()) 110 | .field("Id", SchemaBuilder.string().optional().build()) 111 | .field("Country", SchemaBuilder.string().optional().build()) 112 | .field("PlaceType", SchemaBuilder.string().optional().build()) 113 | .field("URL", SchemaBuilder.string().optional().build()) 114 | .field("FullName", SchemaBuilder.string().optional().build()) 115 | .build(); 116 | } 117 | 118 | static { 119 | GEO_LOCATION_SCHEMA = SchemaBuilder.struct() 120 | .name("com.github.jcustenborder.kafka.connect.twitter.GeoLocation") 121 | .optional() 122 | .doc("Returns The location that this tweet refers to if available.") 123 | .field("Latitude", SchemaBuilder.float64().doc("returns the latitude of the geo location").build()) 124 | .field("Longitude", SchemaBuilder.float64().doc("returns the longitude of the geo location").build()) 125 | .build(); 126 | } 127 | 128 | static { 129 | STATUS_SCHEMA_KEY = SchemaBuilder.struct() 130 | .name("com.github.jcustenborder.kafka.connect.twitter.StatusKey") 131 | .doc("Key for a twitter status.") 132 | .field("Id", Schema.OPTIONAL_INT64_SCHEMA) 133 | .build(); 134 | } 135 | 136 | public static final Schema SCHEMA_MEDIA_ENTITY_VARIANT = SchemaBuilder.struct() 137 | .name("com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity.Variant") 138 | .doc("") 139 | .field("Url", SchemaBuilder.string().optional().doc("").build()) 140 | .field("Bitrate", SchemaBuilder.int32().optional().doc("").build()) 141 | .field("ContentType", SchemaBuilder.string().optional().doc("").build()) 142 | .build(); 143 | public static final Schema SCHEMA_MEDIA_ENTITY_SIZE = SchemaBuilder.struct() 144 | .name("com.github.jcustenborder.kafka.connect.twitter.MediaEntity.Size") 145 | .doc("") 146 | .field("Resize", SchemaBuilder.int32().optional().doc("").build()) 147 | .field("Width", SchemaBuilder.int32().optional().doc("").build()) 148 | .field("Height", SchemaBuilder.int32().optional().doc("").build()) 149 | .build(); 150 | public static final Schema SCHEMA_EXTENDED_MEDIA_ENTITY = SchemaBuilder.struct() 151 | .name("com.github.jcustenborder.kafka.connect.twitter.ExtendedMediaEntity") 152 | .doc("") 153 | .field("VideoAspectRatioWidth", SchemaBuilder.int32().optional().doc("").build()) 154 | .field("VideoAspectRatioHeight", SchemaBuilder.int32().optional().doc("").build()) 155 | .field("VideoDurationMillis", SchemaBuilder.int64().optional().doc("").build()) 156 | .field("VideoVariants", SchemaBuilder.array(SCHEMA_MEDIA_ENTITY_VARIANT).optional().doc("").build()) 157 | .field("ExtAltText", SchemaBuilder.string().optional().doc("").build()) 158 | .field("Id", SchemaBuilder.int64().optional().doc("Returns the id of the media.").build()) 159 | .field("Type", SchemaBuilder.string().optional().doc("Returns the media type photo, video, animated_gif.").build()) 160 | .field("MediaURL", SchemaBuilder.string().optional().doc("Returns the media URL.").build()) 161 | .field("Sizes", SchemaBuilder.map(Schema.INT32_SCHEMA, SCHEMA_MEDIA_ENTITY_SIZE).doc("Returns size variations of the media.").build()) 162 | .field("MediaURLHttps", SchemaBuilder.string().optional().doc("Returns the media secure URL.").build()) 163 | .field("URL", SchemaBuilder.string().optional().doc("Returns the URL mentioned in the tweet.").build()) 164 | .field("Text", SchemaBuilder.string().optional().doc("Returns the URL mentioned in the tweet.").build()) 165 | .field("ExpandedURL", SchemaBuilder.string().optional().doc("Returns the expanded URL if mentioned URL is shorten.").build()) 166 | .field("Start", SchemaBuilder.int32().optional().doc("Returns the index of the start character of the URL mentioned in the tweet.").build()) 167 | .field("End", SchemaBuilder.int32().optional().doc("Returns the index of the end character of the URL mentioned in the tweet.").build()) 168 | .field("DisplayURL", SchemaBuilder.string().optional().doc("Returns the display URL if mentioned URL is shorten.").build()) 169 | .build(); 170 | public static final Schema SCHEMA_HASHTAG_ENTITY = SchemaBuilder.struct() 171 | .name("com.github.jcustenborder.kafka.connect.twitter.HashtagEntity") 172 | .doc("") 173 | .field("Text", SchemaBuilder.string().optional().doc("Returns the text of the hashtag without #.").build()) 174 | .field("Start", SchemaBuilder.int32().optional().doc("Returns the index of the start character of the hashtag.").build()) 175 | .field("End", SchemaBuilder.int32().optional().doc("Returns the index of the end character of the hashtag.").build()) 176 | .build(); 177 | public static final Schema SCHEMA_MEDIA_ENTITY = SchemaBuilder.struct() 178 | .name("com.github.jcustenborder.kafka.connect.twitter.MediaEntity") 179 | .doc("") 180 | .field("Id", SchemaBuilder.int64().optional().doc("Returns the id of the media.").build()) 181 | .field("Type", SchemaBuilder.string().optional().doc("Returns the media type photo, video, animated_gif.").build()) 182 | .field("MediaURL", SchemaBuilder.string().optional().doc("Returns the media URL.").build()) 183 | .field("Sizes", SchemaBuilder.map(Schema.INT32_SCHEMA, SCHEMA_MEDIA_ENTITY_SIZE)) 184 | .field("MediaURLHttps", SchemaBuilder.string().optional().doc("Returns the media secure URL.").build()) 185 | .field("VideoAspectRatioWidth", SchemaBuilder.int32().optional().doc("").build()) 186 | .field("VideoAspectRatioHeight", SchemaBuilder.int32().optional().doc("").build()) 187 | .field("VideoDurationMillis", SchemaBuilder.int64().optional().doc("").build()) 188 | .field("VideoVariants", SchemaBuilder.array(SCHEMA_MEDIA_ENTITY_VARIANT).optional().doc("Returns size variations of the media.").build()) 189 | .field("ExtAltText", SchemaBuilder.string().optional().doc("").build()) 190 | .field("URL", SchemaBuilder.string().optional().doc("Returns the URL mentioned in the tweet.").build()) 191 | .field("Text", SchemaBuilder.string().optional().doc("Returns the URL mentioned in the tweet.").build()) 192 | .field("ExpandedURL", SchemaBuilder.string().optional().doc("Returns the expanded URL if mentioned URL is shorten.").build()) 193 | .field("Start", SchemaBuilder.int32().optional().doc("Returns the index of the start character of the URL mentioned in the tweet.").build()) 194 | .field("End", SchemaBuilder.int32().optional().doc("Returns the index of the end character of the URL mentioned in the tweet.").build()) 195 | .field("DisplayURL", SchemaBuilder.string().optional().doc("Returns the display URL if mentioned URL is shorten.").build()) 196 | .build(); 197 | public static final Schema SCHEMA_SYMBOL_ENTITY = SchemaBuilder.struct() 198 | .name("com.github.jcustenborder.kafka.connect.twitter.SymbolEntity") 199 | .doc("") 200 | .field("Start", SchemaBuilder.int32().optional().doc("Returns the index of the start character of the symbol.").build()) 201 | .field("End", SchemaBuilder.int32().optional().doc("Returns the index of the end character of the symbol.").build()) 202 | .field("Text", SchemaBuilder.string().optional().doc("Returns the text of the entity").build()) 203 | .build(); 204 | public static final Schema SCHEMA_URL_ENTITY = SchemaBuilder.struct() 205 | .name("com.github.jcustenborder.kafka.connect.twitter.URLEntity") 206 | .doc("") 207 | .field("URL", SchemaBuilder.string().optional().doc("Returns the URL mentioned in the tweet.").build()) 208 | .field("Text", SchemaBuilder.string().optional().doc("Returns the URL mentioned in the tweet.").build()) 209 | .field("ExpandedURL", SchemaBuilder.string().optional().doc("Returns the expanded URL if mentioned URL is shorten.").build()) 210 | .field("Start", SchemaBuilder.int32().optional().doc("Returns the index of the start character of the URL mentioned in the tweet.").build()) 211 | .field("End", SchemaBuilder.int32().optional().doc("Returns the index of the end character of the URL mentioned in the tweet.").build()) 212 | .field("DisplayURL", SchemaBuilder.string().optional().doc("Returns the display URL if mentioned URL is shorten.").build()) 213 | .build(); 214 | public static final Schema SCHEMA_USER_MENTION_ENTITY = SchemaBuilder.struct() 215 | .name("com.github.jcustenborder.kafka.connect.twitter.UserMentionEntity") 216 | .doc("") 217 | .field("Name", SchemaBuilder.string().optional().doc("Returns the name mentioned in the status.").build()) 218 | .field("Id", SchemaBuilder.int64().optional().doc("Returns the user id mentioned in the status.").build()) 219 | .field("Text", SchemaBuilder.string().optional().doc("Returns the screen name mentioned in the status.").build()) 220 | .field("ScreenName", SchemaBuilder.string().optional().doc("Returns the screen name mentioned in the status.").build()) 221 | .field("Start", SchemaBuilder.int32().optional().doc("Returns the index of the start character of the user mention.").build()) 222 | .field("End", SchemaBuilder.int32().optional().doc("Returns the index of the end character of the user mention.").build()) 223 | .build(); 224 | 225 | static { 226 | STATUS_SCHEMA = SchemaBuilder.struct() 227 | .name("com.github.jcustenborder.kafka.connect.twitter.Status") 228 | .doc("Twitter status message.") 229 | .field("CreatedAt", Timestamp.builder().doc("Return the created_at").optional().build()) 230 | .field("Id", SchemaBuilder.int64().doc("Returns the id of the status").optional().build()) 231 | .field("Text", SchemaBuilder.string().doc("Returns the text of the status").optional().build()) 232 | .field("Source", SchemaBuilder.string().doc("Returns the source").optional().build()) 233 | .field("Truncated", SchemaBuilder.bool().doc("Test if the status is truncated").optional().build()) 234 | .field("InReplyToStatusId", SchemaBuilder.int64().doc("Returns the in_reply_tostatus_id").optional().build()) 235 | .field("InReplyToUserId", SchemaBuilder.int64().doc("Returns the in_reply_user_id").optional().build()) 236 | .field("InReplyToScreenName", SchemaBuilder.string().doc("Returns the in_reply_to_screen_name").optional().build()) 237 | .field("GeoLocation", GEO_LOCATION_SCHEMA) 238 | .field("Place", PLACE_SCHEMA) 239 | .field("Favorited", SchemaBuilder.bool().doc("Test if the status is favorited").optional().build()) 240 | .field("Retweeted", SchemaBuilder.bool().doc("Test if the status is retweeted").optional().build()) 241 | .field("FavoriteCount", SchemaBuilder.int32().doc("Indicates approximately how many times this Tweet has been \"favorited\" by Twitter users.").optional().build()) 242 | .field("User", USER_SCHEMA) 243 | .field("Retweet", SchemaBuilder.bool().optional().build()) 244 | .field("Contributors", SchemaBuilder.array(Schema.INT64_SCHEMA).doc("Returns an array of contributors, or null if no contributor is associated with this status.").build()) 245 | .field("RetweetCount", SchemaBuilder.int32().doc("Returns the number of times this tweet has been retweeted, or -1 when the tweet was created before this feature was enabled.").optional().build()) 246 | .field("RetweetedByMe", SchemaBuilder.bool().optional().build()) 247 | .field("CurrentUserRetweetId", SchemaBuilder.int64().doc("Returns the authenticating user's retweet's id of this tweet, or -1L when the tweet was created before this feature was enabled.").optional().build()) 248 | .field("PossiblySensitive", SchemaBuilder.bool().optional().build()) 249 | .field("Lang", SchemaBuilder.string().doc("Returns the lang of the status text if available.").optional().build()) 250 | .field("WithheldInCountries", SchemaBuilder.array(Schema.STRING_SCHEMA).doc("Returns the list of country codes where the tweet is withheld").build()) 251 | .field("HashtagEntities", SchemaBuilder.array(SCHEMA_HASHTAG_ENTITY).doc("Returns an array if hashtag mentioned in the tweet.").optional().build()) 252 | .field("UserMentionEntities", SchemaBuilder.array(SCHEMA_USER_MENTION_ENTITY).doc("Returns an array of user mentions in the tweet.").optional().build()) 253 | .field("MediaEntities", SchemaBuilder.array(SCHEMA_MEDIA_ENTITY).doc("Returns an array of MediaEntities if medias are available in the tweet.").optional().build()) 254 | .field("SymbolEntities", SchemaBuilder.array(SCHEMA_SYMBOL_ENTITY).doc("Returns an array of SymbolEntities if medias are available in the tweet.").optional().build()) 255 | .field("URLEntities", SchemaBuilder.array(SCHEMA_URL_ENTITY).doc("Returns an array if URLEntity mentioned in the tweet.").optional().build()) 256 | 257 | .build(); 258 | } 259 | 260 | static { 261 | SCHEMA_STATUS_DELETION_NOTICE = SchemaBuilder.struct() 262 | .name("com.github.jcustenborder.kafka.connect.twitter.StatusDeletionNotice") 263 | .doc("Message that is received when a status is deleted from Twitter.") 264 | .field("StatusId", Schema.INT64_SCHEMA) 265 | .field("UserId", Schema.INT64_SCHEMA) 266 | .build(); 267 | } 268 | 269 | static { 270 | SCHEMA_STATUS_DELETION_NOTICE_KEY = SchemaBuilder.struct() 271 | .name("com.github.jcustenborder.kafka.connect.twitter.StatusDeletionNoticeKey") 272 | .doc("Key for a message that is received when a status is deleted from Twitter.") 273 | .field("StatusId", Schema.INT64_SCHEMA) 274 | .build(); 275 | } 276 | 277 | static Map convertSizes(Map items) { 278 | Map results = new LinkedHashMap<>(); 279 | 280 | if (items == null) { 281 | return results; 282 | } 283 | 284 | for (Map.Entry kvp : items.entrySet()) { 285 | results.put(kvp.getKey(), convertMediaEntitySize(kvp.getValue())); 286 | } 287 | 288 | return results; 289 | } 290 | 291 | public static void convert(User user, Struct struct) { 292 | struct 293 | .put("Id", user.getId()) 294 | .put("Name", user.getName()) 295 | .put("ScreenName", user.getScreenName()) 296 | .put("Location", user.getLocation()) 297 | .put("Description", user.getDescription()) 298 | .put("ContributorsEnabled", user.isContributorsEnabled()) 299 | .put("ProfileImageURL", user.getProfileImageURL()) 300 | .put("BiggerProfileImageURL", user.getBiggerProfileImageURL()) 301 | .put("MiniProfileImageURL", user.getMiniProfileImageURL()) 302 | .put("OriginalProfileImageURL", user.getOriginalProfileImageURL()) 303 | .put("ProfileImageURLHttps", user.getProfileImageURLHttps()) 304 | .put("BiggerProfileImageURLHttps", user.getBiggerProfileImageURLHttps()) 305 | .put("MiniProfileImageURLHttps", user.getMiniProfileImageURLHttps()) 306 | .put("OriginalProfileImageURLHttps", user.getOriginalProfileImageURLHttps()) 307 | .put("DefaultProfileImage", user.isDefaultProfileImage()) 308 | .put("URL", user.getURL()) 309 | .put("Protected", user.isProtected()) 310 | .put("FollowersCount", user.getFollowersCount()) 311 | .put("ProfileBackgroundColor", user.getProfileBackgroundColor()) 312 | .put("ProfileTextColor", user.getProfileTextColor()) 313 | .put("ProfileLinkColor", user.getProfileLinkColor()) 314 | .put("ProfileSidebarFillColor", user.getProfileSidebarFillColor()) 315 | .put("ProfileSidebarBorderColor", user.getProfileSidebarBorderColor()) 316 | .put("ProfileUseBackgroundImage", user.isProfileUseBackgroundImage()) 317 | .put("DefaultProfile", user.isDefaultProfile()) 318 | .put("ShowAllInlineMedia", user.isShowAllInlineMedia()) 319 | .put("FriendsCount", user.getFriendsCount()) 320 | .put("CreatedAt", user.getCreatedAt()) 321 | .put("FavouritesCount", user.getFavouritesCount()) 322 | .put("UtcOffset", user.getUtcOffset()) 323 | .put("TimeZone", user.getTimeZone()) 324 | .put("ProfileBackgroundImageURL", user.getProfileBackgroundImageURL()) 325 | .put("ProfileBackgroundImageUrlHttps", user.getProfileBackgroundImageUrlHttps()) 326 | .put("ProfileBannerURL", user.getProfileBannerURL()) 327 | .put("ProfileBannerRetinaURL", user.getProfileBannerRetinaURL()) 328 | .put("ProfileBannerIPadURL", user.getProfileBannerIPadURL()) 329 | .put("ProfileBannerIPadRetinaURL", user.getProfileBannerIPadRetinaURL()) 330 | .put("ProfileBannerMobileURL", user.getProfileBannerMobileURL()) 331 | .put("ProfileBannerMobileRetinaURL", user.getProfileBannerMobileRetinaURL()) 332 | .put("ProfileBackgroundTiled", user.isProfileBackgroundTiled()) 333 | .put("Lang", user.getLang()) 334 | .put("StatusesCount", user.getStatusesCount()) 335 | .put("GeoEnabled", user.isGeoEnabled()) 336 | .put("Verified", user.isVerified()) 337 | .put("Translator", user.isTranslator()) 338 | .put("ListedCount", user.getListedCount()) 339 | .put("FollowRequestSent", user.isFollowRequestSent()); 340 | 341 | List withheldInCountries = new ArrayList<>(); 342 | if (null != user.getWithheldInCountries()) { 343 | for (String s : user.getWithheldInCountries()) { 344 | withheldInCountries.add(s); 345 | } 346 | } 347 | struct.put("WithheldInCountries", withheldInCountries); 348 | 349 | } 350 | 351 | public static void convert(Place place, Struct struct) { 352 | if (null == place) { 353 | return; 354 | } 355 | struct.put("Name", place.getName()) 356 | .put("StreetAddress", place.getStreetAddress()) 357 | .put("CountryCode", place.getCountryCode()) 358 | .put("Id", place.getId()) 359 | .put("Country", place.getCountry()) 360 | .put("PlaceType", place.getPlaceType()) 361 | .put("URL", place.getURL()) 362 | .put("FullName", place.getFullName()); 363 | } 364 | 365 | public static void convert(GeoLocation geoLocation, Struct struct) { 366 | if (null == geoLocation) { 367 | return; 368 | } 369 | struct.put("Latitude", geoLocation.getLatitude()) 370 | .put("Longitude", geoLocation.getLongitude()); 371 | } 372 | 373 | 374 | static Struct convertMediaEntityVariant(MediaEntity.Variant variant) { 375 | return new Struct(SCHEMA_MEDIA_ENTITY_VARIANT) 376 | .put("Url", variant.getUrl()) 377 | .put("Bitrate", variant.getBitrate()) 378 | .put("ContentType", variant.getContentType()); 379 | } 380 | 381 | public static List convert(MediaEntity.Variant[] items) { 382 | List result = new ArrayList<>(); 383 | if (null == items) { 384 | return result; 385 | } 386 | for (MediaEntity.Variant item : items) { 387 | Struct struct = convertMediaEntityVariant(item); 388 | result.add(struct); 389 | } 390 | return result; 391 | } 392 | 393 | 394 | static Struct convertMediaEntitySize(MediaEntity.Size size) { 395 | return new Struct(SCHEMA_MEDIA_ENTITY_SIZE) 396 | .put("Resize", size.getResize()) 397 | .put("Width", size.getWidth()) 398 | .put("Height", size.getHeight()); 399 | } 400 | 401 | public static List convert(MediaEntity.Size[] items) { 402 | List result = new ArrayList<>(); 403 | if (null == items) { 404 | return result; 405 | } 406 | for (MediaEntity.Size item : items) { 407 | Struct struct = convertMediaEntitySize(item); 408 | result.add(struct); 409 | } 410 | return result; 411 | } 412 | 413 | 414 | static Struct convertExtendedMediaEntity(ExtendedMediaEntity extendedMediaEntity) { 415 | return new Struct(SCHEMA_EXTENDED_MEDIA_ENTITY) 416 | .put("VideoAspectRatioWidth", extendedMediaEntity.getVideoAspectRatioWidth()) 417 | .put("VideoAspectRatioHeight", extendedMediaEntity.getVideoAspectRatioHeight()) 418 | .put("VideoDurationMillis", extendedMediaEntity.getVideoDurationMillis()) 419 | .put("VideoVariants", extendedMediaEntity.getVideoVariants()) 420 | .put("ExtAltText", extendedMediaEntity.getExtAltText()) 421 | .put("Id", extendedMediaEntity.getId()) 422 | .put("Type", extendedMediaEntity.getType()) 423 | .put("MediaURL", extendedMediaEntity.getMediaURL()) 424 | .put("Sizes", extendedMediaEntity.getSizes()) 425 | .put("MediaURLHttps", extendedMediaEntity.getMediaURLHttps()) 426 | .put("URL", extendedMediaEntity.getURL()) 427 | .put("Text", extendedMediaEntity.getText()) 428 | .put("ExpandedURL", extendedMediaEntity.getExpandedURL()) 429 | .put("Start", extendedMediaEntity.getStart()) 430 | .put("End", extendedMediaEntity.getEnd()) 431 | .put("DisplayURL", extendedMediaEntity.getDisplayURL()); 432 | } 433 | 434 | public static List convert(ExtendedMediaEntity[] items) { 435 | List result = new ArrayList<>(); 436 | if (null == items) { 437 | return result; 438 | } 439 | for (ExtendedMediaEntity item : items) { 440 | Struct struct = convertExtendedMediaEntity(item); 441 | result.add(struct); 442 | } 443 | return result; 444 | } 445 | 446 | 447 | static Struct convertHashtagEntity(HashtagEntity hashtagEntity) { 448 | return new Struct(SCHEMA_HASHTAG_ENTITY) 449 | .put("Text", hashtagEntity.getText()) 450 | .put("Start", hashtagEntity.getStart()) 451 | .put("End", hashtagEntity.getEnd()); 452 | } 453 | 454 | public static List convert(HashtagEntity[] items) { 455 | List result = new ArrayList<>(); 456 | if (null == items) { 457 | return result; 458 | } 459 | for (HashtagEntity item : items) { 460 | Struct struct = convertHashtagEntity(item); 461 | result.add(struct); 462 | } 463 | return result; 464 | } 465 | 466 | 467 | static Struct convertMediaEntity(MediaEntity mediaEntity) { 468 | return new Struct(SCHEMA_MEDIA_ENTITY) 469 | .put("Id", mediaEntity.getId()) 470 | .put("Type", mediaEntity.getType()) 471 | .put("MediaURL", mediaEntity.getMediaURL()) 472 | .put("Sizes", convertSizes(mediaEntity.getSizes())) 473 | .put("MediaURLHttps", mediaEntity.getMediaURLHttps()) 474 | .put("VideoAspectRatioWidth", mediaEntity.getVideoAspectRatioWidth()) 475 | .put("VideoAspectRatioHeight", mediaEntity.getVideoAspectRatioHeight()) 476 | .put("VideoDurationMillis", mediaEntity.getVideoDurationMillis()) 477 | .put("VideoVariants", convert(mediaEntity.getVideoVariants())) 478 | .put("ExtAltText", mediaEntity.getExtAltText()) 479 | .put("URL", mediaEntity.getURL()) 480 | .put("Text", mediaEntity.getText()) 481 | .put("ExpandedURL", mediaEntity.getExpandedURL()) 482 | .put("Start", mediaEntity.getStart()) 483 | .put("End", mediaEntity.getEnd()) 484 | .put("DisplayURL", mediaEntity.getDisplayURL()); 485 | } 486 | 487 | public static List convert(MediaEntity[] items) { 488 | List result = new ArrayList<>(); 489 | if (null == items) { 490 | return result; 491 | } 492 | for (MediaEntity item : items) { 493 | Struct struct = convertMediaEntity(item); 494 | result.add(struct); 495 | } 496 | return result; 497 | } 498 | 499 | 500 | static Struct convertSymbolEntity(SymbolEntity symbolEntity) { 501 | return new Struct(SCHEMA_SYMBOL_ENTITY) 502 | .put("Start", symbolEntity.getStart()) 503 | .put("End", symbolEntity.getEnd()) 504 | .put("Text", symbolEntity.getText()); 505 | } 506 | 507 | public static List convert(SymbolEntity[] items) { 508 | List result = new ArrayList<>(); 509 | if (null == items) { 510 | return result; 511 | } 512 | for (SymbolEntity item : items) { 513 | Struct struct = convertSymbolEntity(item); 514 | result.add(struct); 515 | } 516 | return result; 517 | } 518 | 519 | 520 | static Struct convertURLEntity(URLEntity uRLEntity) { 521 | return new Struct(SCHEMA_URL_ENTITY) 522 | .put("URL", uRLEntity.getURL()) 523 | .put("Text", uRLEntity.getText()) 524 | .put("ExpandedURL", uRLEntity.getExpandedURL()) 525 | .put("Start", uRLEntity.getStart()) 526 | .put("End", uRLEntity.getEnd()) 527 | .put("DisplayURL", uRLEntity.getDisplayURL()); 528 | } 529 | 530 | public static List convert(URLEntity[] items) { 531 | List result = new ArrayList<>(); 532 | if (null == items) { 533 | return result; 534 | } 535 | for (URLEntity item : items) { 536 | Struct struct = convertURLEntity(item); 537 | result.add(struct); 538 | } 539 | return result; 540 | } 541 | 542 | 543 | static Struct convertUserMentionEntity(UserMentionEntity userMentionEntity) { 544 | return new Struct(SCHEMA_USER_MENTION_ENTITY) 545 | .put("Name", userMentionEntity.getName()) 546 | .put("Id", userMentionEntity.getId()) 547 | .put("Text", userMentionEntity.getText()) 548 | .put("ScreenName", userMentionEntity.getScreenName()) 549 | .put("Start", userMentionEntity.getStart()) 550 | .put("End", userMentionEntity.getEnd()); 551 | } 552 | 553 | public static List convert(UserMentionEntity[] items) { 554 | List result = new ArrayList<>(); 555 | if (null == items) { 556 | return result; 557 | } 558 | for (UserMentionEntity item : items) { 559 | Struct struct = convertUserMentionEntity(item); 560 | result.add(struct); 561 | } 562 | return result; 563 | } 564 | 565 | 566 | public static void convertKey(Status status, Struct struct) { 567 | struct.put("Id", status.getId()); 568 | } 569 | 570 | public static void convert(Status status, Struct struct) { 571 | struct 572 | .put("CreatedAt", status.getCreatedAt()) 573 | .put("Id", status.getId()) 574 | .put("Text", status.getText()) 575 | .put("Source", status.getSource()) 576 | .put("Truncated", status.isTruncated()) 577 | .put("InReplyToStatusId", status.getInReplyToStatusId()) 578 | .put("InReplyToUserId", status.getInReplyToUserId()) 579 | .put("InReplyToScreenName", status.getInReplyToScreenName()) 580 | .put("Favorited", status.isFavorited()) 581 | .put("Retweeted", status.isRetweeted()) 582 | .put("FavoriteCount", status.getFavoriteCount()) 583 | .put("Retweet", status.isRetweet()) 584 | .put("RetweetCount", status.getRetweetCount()) 585 | .put("RetweetedByMe", status.isRetweetedByMe()) 586 | .put("CurrentUserRetweetId", status.getCurrentUserRetweetId()) 587 | .put("PossiblySensitive", status.isPossiblySensitive()) 588 | .put("Lang", status.getLang()); 589 | 590 | Struct userStruct; 591 | if (null != status.getUser()) { 592 | userStruct = new Struct(USER_SCHEMA); 593 | convert(status.getUser(), userStruct); 594 | } else { 595 | userStruct = null; 596 | } 597 | struct.put("User", userStruct); 598 | 599 | Struct placeStruct; 600 | if (null != status.getPlace()) { 601 | placeStruct = new Struct(PLACE_SCHEMA); 602 | convert(status.getPlace(), placeStruct); 603 | } else { 604 | placeStruct = null; 605 | } 606 | struct.put("Place", placeStruct); 607 | 608 | Struct geoLocationStruct; 609 | if (null != status.getGeoLocation()) { 610 | geoLocationStruct = new Struct(GEO_LOCATION_SCHEMA); 611 | convert(status.getGeoLocation(), geoLocationStruct); 612 | } else { 613 | geoLocationStruct = null; 614 | } 615 | struct.put("GeoLocation", geoLocationStruct); 616 | List contributers = new ArrayList<>(); 617 | 618 | if (null != status.getContributors()) { 619 | for (Long l : status.getContributors()) { 620 | contributers.add(l); 621 | } 622 | } 623 | struct.put("Contributors", contributers); 624 | 625 | List withheldInCountries = new ArrayList<>(); 626 | if (null != status.getWithheldInCountries()) { 627 | for (String s : status.getWithheldInCountries()) { 628 | withheldInCountries.add(s); 629 | } 630 | } 631 | struct.put("WithheldInCountries", withheldInCountries); 632 | 633 | struct.put("HashtagEntities", convert(status.getHashtagEntities())); 634 | struct.put("UserMentionEntities", convert(status.getUserMentionEntities())); 635 | struct.put("MediaEntities", convert(status.getMediaEntities())); 636 | struct.put("SymbolEntities", convert(status.getSymbolEntities())); 637 | struct.put("URLEntities", convert(status.getURLEntities())); 638 | } 639 | 640 | public static void convert(StatusDeletionNotice statusDeletionNotice, Struct struct) { 641 | struct.put("StatusId", statusDeletionNotice.getStatusId()); 642 | struct.put("UserId", statusDeletionNotice.getUserId()); 643 | } 644 | 645 | public static void convertKey(StatusDeletionNotice statusDeletionNotice, Struct struct) { 646 | struct.put("StatusId", statusDeletionNotice.getStatusId()); 647 | } 648 | } 649 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/twitter/TwitterSourceConnector.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.twitter; 17 | 18 | import com.github.jcustenborder.kafka.connect.utils.VersionUtil; 19 | import com.github.jcustenborder.kafka.connect.utils.config.Description; 20 | import com.github.jcustenborder.kafka.connect.utils.config.Title; 21 | import com.google.common.base.Joiner; 22 | import com.google.common.base.Preconditions; 23 | import com.google.common.collect.ArrayListMultimap; 24 | import com.google.common.collect.Multimap; 25 | import org.apache.kafka.common.config.ConfigDef; 26 | import org.apache.kafka.connect.connector.Task; 27 | import org.apache.kafka.connect.source.SourceConnector; 28 | import org.slf4j.Logger; 29 | import org.slf4j.LoggerFactory; 30 | 31 | import java.util.ArrayList; 32 | import java.util.Collection; 33 | import java.util.LinkedHashMap; 34 | import java.util.List; 35 | import java.util.Map; 36 | 37 | @Title("Twitter") 38 | @Description("This Twitter Source connector is used to pull data from Twitter in realtime.") 39 | public class TwitterSourceConnector extends SourceConnector { 40 | private static Logger log = LoggerFactory.getLogger(TwitterSourceConnector.class); 41 | Map settings; 42 | private TwitterSourceConnectorConfig config; 43 | 44 | @Override 45 | public String version() { 46 | return VersionUtil.version(this.getClass()); 47 | } 48 | 49 | @Override 50 | public void start(Map map) { 51 | this.config = new TwitterSourceConnectorConfig(map); 52 | this.settings = map; 53 | } 54 | 55 | @Override 56 | public Class taskClass() { 57 | return TwitterSourceTask.class; 58 | } 59 | 60 | @Override 61 | public List> taskConfigs(int maxTasks) { 62 | Preconditions.checkState(maxTasks > 0, "MaxTasks must be greater than 0"); 63 | final int tasks = Math.min(maxTasks, this.config.filterKeywords.size()); 64 | 65 | 66 | Multimap taskToKeywords = ArrayListMultimap.create(); 67 | int index = 0; 68 | for (String keyword : this.config.filterKeywords) { 69 | final int taskID = index % tasks; 70 | taskToKeywords.put(taskID, keyword); 71 | index++; 72 | } 73 | final List> taskConfigs = new ArrayList<>(tasks); 74 | 75 | for (Integer taskID : taskToKeywords.keySet()) { 76 | Collection keywords = taskToKeywords.get(taskID); 77 | Map taskSettings = new LinkedHashMap<>(this.settings); 78 | taskSettings.put(TwitterSourceConnectorConfig.FILTER_KEYWORDS_CONF, Joiner.on(',').join(keywords)); 79 | taskConfigs.add(taskSettings); 80 | } 81 | 82 | return taskConfigs; 83 | } 84 | 85 | @Override 86 | public void stop() { 87 | 88 | } 89 | 90 | @Override 91 | public ConfigDef config() { 92 | return TwitterSourceConnectorConfig.conf(); 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/twitter/TwitterSourceConnectorConfig.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.twitter; 17 | 18 | import com.github.jcustenborder.kafka.connect.utils.config.ConfigKeyBuilder; 19 | import com.github.jcustenborder.kafka.connect.utils.config.ConfigUtils; 20 | import com.google.common.primitives.Longs; 21 | import org.apache.kafka.common.config.AbstractConfig; 22 | import org.apache.kafka.common.config.ConfigDef; 23 | import org.apache.kafka.common.config.ConfigDef.Importance; 24 | import org.apache.kafka.common.config.ConfigDef.Type; 25 | import org.apache.kafka.common.config.ConfigException; 26 | import twitter4j.conf.Configuration; 27 | import twitter4j.conf.PropertyConfiguration; 28 | 29 | import java.util.Collections; 30 | import java.util.List; 31 | import java.util.Map; 32 | import java.util.Properties; 33 | import java.util.Set; 34 | import java.util.stream.Collectors; 35 | 36 | 37 | public class TwitterSourceConnectorConfig extends AbstractConfig { 38 | 39 | public static final String TWITTER_DEBUG_CONF = "twitter.debug"; 40 | public static final String TWITTER_OAUTH_CONSUMER_KEY_CONF = "twitter.oauth.consumerKey"; 41 | public static final String TWITTER_OAUTH_SECRET_KEY_CONF = "twitter.oauth.consumerSecret"; 42 | public static final String TWITTER_OAUTH_ACCESS_TOKEN_CONF = "twitter.oauth.accessToken"; 43 | public static final String TWITTER_OAUTH_ACCESS_TOKEN_SECRET_CONF = "twitter.oauth.accessTokenSecret"; 44 | public static final String FILTER_KEYWORDS_CONF = "filter.keywords"; 45 | public static final String FILTER_USER_IDS_CONF = "filter.userIds"; 46 | public static final String KAFKA_STATUS_TOPIC_CONF = "kafka.status.topic"; 47 | public static final String KAFKA_STATUS_TOPIC_DOC = "Kafka topic to write the statuses to."; 48 | public static final String PROCESS_DELETES_CONF = "process.deletes"; 49 | public static final String PROCESS_DELETES_DOC = "Should this connector process deletes."; 50 | public static final String QUEUE_EMPTY_MS_CONF = "queue.empty.ms"; 51 | public static final String QUEUE_BATCH_SIZE_CONF = "queue.batch.size"; 52 | private static final String TWITTER_DEBUG_DOC = "Flag to enable debug logging for the twitter api."; 53 | private static final String TWITTER_OAUTH_CONSUMER_KEY_DOC = "OAuth consumer key"; 54 | private static final String TWITTER_OAUTH_SECRET_KEY_DOC = "OAuth consumer secret"; 55 | private static final String TWITTER_OAUTH_ACCESS_TOKEN_DOC = "OAuth access token"; 56 | private static final String TWITTER_OAUTH_ACCESS_TOKEN_SECRET_DOC = "OAuth access token secret"; 57 | private static final String FILTER_KEYWORDS_DOC = "Twitter keywords to filter for."; 58 | private static final String FILTER_USER_IDS_DOC = "Twitter user IDs to follow."; 59 | public static final String QUEUE_EMPTY_MS_DOC = "The amount of time to wait if there are no records in the queue."; 60 | public static final String QUEUE_BATCH_SIZE_DOC = "The number of records to return in a single batch."; 61 | 62 | 63 | public final String topic; 64 | public final boolean twitterDebug; 65 | public final boolean processDeletes; 66 | public final Set filterKeywords; 67 | public final Set filterUserIds; 68 | public final int queueEmptyMs; 69 | public final int queueBatchSize; 70 | 71 | 72 | public TwitterSourceConnectorConfig(Map parsedConfig) { 73 | super(conf(), parsedConfig); 74 | this.topic = this.getString(KAFKA_STATUS_TOPIC_CONF); 75 | this.twitterDebug = this.getBoolean(TWITTER_DEBUG_CONF); 76 | this.processDeletes = this.getBoolean(PROCESS_DELETES_CONF); 77 | this.filterKeywords = ConfigUtils.getSet(this, FILTER_KEYWORDS_CONF); 78 | this.filterUserIds = ConfigUtils.getSet(this, FILTER_USER_IDS_CONF) 79 | .stream() 80 | .map(Long::parseLong) 81 | .collect(Collectors.toSet()); 82 | this.queueBatchSize = getInt(QUEUE_BATCH_SIZE_CONF); 83 | this.queueEmptyMs = getInt(QUEUE_EMPTY_MS_CONF); 84 | } 85 | 86 | static class UserIdValidator implements ConfigDef.Validator { 87 | @Override 88 | public void ensureValid(String key, Object o) { 89 | if (o instanceof List) { 90 | List userIds = (List) o; 91 | for (String userId : userIds) { 92 | if (null == Longs.tryParse(userId)) { 93 | throw new ConfigException(key, userId, "Could not parse to long."); 94 | } 95 | } 96 | } 97 | } 98 | } 99 | 100 | static final ConfigDef.Validator USERID_VALIDATOR = new UserIdValidator(); 101 | 102 | public static ConfigDef conf() { 103 | return new ConfigDef() 104 | .define(TWITTER_DEBUG_CONF, Type.BOOLEAN, false, Importance.LOW, TWITTER_DEBUG_DOC) 105 | .define(TWITTER_OAUTH_CONSUMER_KEY_CONF, Type.PASSWORD, Importance.HIGH, TWITTER_OAUTH_CONSUMER_KEY_DOC) 106 | .define(TWITTER_OAUTH_SECRET_KEY_CONF, Type.PASSWORD, Importance.HIGH, TWITTER_OAUTH_SECRET_KEY_DOC) 107 | .define(TWITTER_OAUTH_ACCESS_TOKEN_CONF, Type.PASSWORD, Importance.HIGH, TWITTER_OAUTH_ACCESS_TOKEN_DOC) 108 | .define(TWITTER_OAUTH_ACCESS_TOKEN_SECRET_CONF, Type.PASSWORD, Importance.HIGH, TWITTER_OAUTH_ACCESS_TOKEN_SECRET_DOC) 109 | .define(FILTER_KEYWORDS_CONF, Type.LIST, Importance.HIGH, FILTER_KEYWORDS_DOC) 110 | .define( 111 | ConfigKeyBuilder.of(FILTER_USER_IDS_CONF, Type.LIST) 112 | .importance(Importance.HIGH) 113 | .documentation(FILTER_USER_IDS_DOC) 114 | .defaultValue(Collections.emptyList()) 115 | .validator(USERID_VALIDATOR) 116 | .build() 117 | ) 118 | .define(KAFKA_STATUS_TOPIC_CONF, Type.STRING, Importance.HIGH, KAFKA_STATUS_TOPIC_DOC) 119 | .define(PROCESS_DELETES_CONF, Type.BOOLEAN, Importance.HIGH, PROCESS_DELETES_DOC) 120 | .define( 121 | ConfigKeyBuilder.of(QUEUE_EMPTY_MS_CONF, Type.INT) 122 | .importance(Importance.LOW) 123 | .documentation(QUEUE_EMPTY_MS_DOC) 124 | .defaultValue(100) 125 | .validator(ConfigDef.Range.atLeast(10)) 126 | .build() 127 | ) 128 | .define( 129 | ConfigKeyBuilder.of(QUEUE_BATCH_SIZE_CONF, Type.INT) 130 | .importance(Importance.LOW) 131 | .documentation(QUEUE_BATCH_SIZE_DOC) 132 | .defaultValue(100) 133 | .validator(ConfigDef.Range.atLeast(1)) 134 | .build() 135 | ); 136 | } 137 | 138 | 139 | public Configuration configuration() { 140 | Properties properties = new Properties(); 141 | /* 142 | Grab all of the key/values that have a key that starts with twitter. This will strip 'twitter.' from beginning of 143 | each key. This aligns with what the twitter4j framework is expecting. 144 | */ 145 | properties.putAll(this.originalsWithPrefix("twitter.")); 146 | return new PropertyConfiguration(properties); 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/twitter/TwitterSourceTask.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.twitter; 17 | 18 | import com.github.jcustenborder.kafka.connect.utils.VersionUtil; 19 | import com.github.jcustenborder.kafka.connect.utils.data.SourceRecordDeque; 20 | import com.github.jcustenborder.kafka.connect.utils.data.SourceRecordDequeBuilder; 21 | import com.google.common.base.Joiner; 22 | import com.google.common.collect.ImmutableMap; 23 | import org.apache.kafka.connect.data.Struct; 24 | import org.apache.kafka.connect.source.SourceRecord; 25 | import org.apache.kafka.connect.source.SourceTask; 26 | import org.slf4j.Logger; 27 | import org.slf4j.LoggerFactory; 28 | import twitter4j.FilterQuery; 29 | import twitter4j.StallWarning; 30 | import twitter4j.Status; 31 | import twitter4j.StatusDeletionNotice; 32 | import twitter4j.StatusListener; 33 | import twitter4j.TwitterStream; 34 | import twitter4j.TwitterStreamFactory; 35 | 36 | import java.util.List; 37 | import java.util.Map; 38 | 39 | public class TwitterSourceTask extends SourceTask implements StatusListener { 40 | static final Logger log = LoggerFactory.getLogger(TwitterSourceTask.class); 41 | SourceRecordDeque messageQueue; 42 | 43 | TwitterStream twitterStream; 44 | TwitterSourceConnectorConfig config; 45 | 46 | @Override 47 | public String version() { 48 | return VersionUtil.version(this.getClass()); 49 | } 50 | 51 | @Override 52 | public void start(Map map) { 53 | this.config = new TwitterSourceConnectorConfig(map); 54 | this.messageQueue = SourceRecordDequeBuilder.of() 55 | .emptyWaitMs(this.config.queueEmptyMs) 56 | .batchSize(this.config.queueBatchSize) 57 | .build(); 58 | 59 | TwitterStreamFactory twitterStreamFactory = new TwitterStreamFactory(this.config.configuration()); 60 | this.twitterStream = twitterStreamFactory.getInstance(); 61 | String[] keywords = this.config.filterKeywords.toArray(new String[0]); 62 | if (log.isInfoEnabled()) { 63 | log.info("Setting up filters. Keywords = {}", Joiner.on(", ").join(keywords)); 64 | } 65 | 66 | FilterQuery filterQuery = new FilterQuery(); 67 | filterQuery.track(keywords); 68 | if (!this.config.filterUserIds.isEmpty()) { 69 | long[] userIds = this.config.filterUserIds.stream().mapToLong(Long::valueOf).toArray(); 70 | log.info("Setting up filters. userIds = {}", Joiner.on(", ").join(this.config.filterUserIds)); 71 | filterQuery.follow(userIds); 72 | } 73 | 74 | if (log.isInfoEnabled()) { 75 | log.info("Starting the twitter stream."); 76 | } 77 | twitterStream.addListener(this); 78 | twitterStream.filter(filterQuery); 79 | } 80 | 81 | @Override 82 | public List poll() throws InterruptedException { 83 | return this.messageQueue.getBatch(); 84 | } 85 | 86 | @Override 87 | public void stop() { 88 | if (log.isInfoEnabled()) { 89 | log.info("Shutting down twitter stream."); 90 | } 91 | twitterStream.shutdown(); 92 | } 93 | 94 | @Override 95 | public void onStatus(Status status) { 96 | try { 97 | Struct keyStruct = new Struct(StatusConverter.STATUS_SCHEMA_KEY); 98 | Struct valueStruct = new Struct(StatusConverter.STATUS_SCHEMA); 99 | 100 | StatusConverter.convertKey(status, keyStruct); 101 | StatusConverter.convert(status, valueStruct); 102 | 103 | Map sourcePartition = ImmutableMap.of(); 104 | Map sourceOffset = ImmutableMap.of(); 105 | 106 | SourceRecord record = new SourceRecord(sourcePartition, sourceOffset, this.config.topic, StatusConverter.STATUS_SCHEMA_KEY, keyStruct, StatusConverter.STATUS_SCHEMA, valueStruct); 107 | this.messageQueue.add(record); 108 | } catch (Exception ex) { 109 | if (log.isErrorEnabled()) { 110 | log.error("Exception thrown", ex); 111 | } 112 | } 113 | } 114 | 115 | @Override 116 | public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) { 117 | if (!this.config.processDeletes) { 118 | return; 119 | } 120 | 121 | try { 122 | Struct keyStruct = new Struct(StatusConverter.SCHEMA_STATUS_DELETION_NOTICE_KEY); 123 | 124 | StatusConverter.convertKey(statusDeletionNotice, keyStruct); 125 | 126 | Map sourcePartition = ImmutableMap.of(); 127 | Map sourceOffset = ImmutableMap.of(); 128 | 129 | SourceRecord record = new SourceRecord(sourcePartition, sourceOffset, this.config.topic, StatusConverter.SCHEMA_STATUS_DELETION_NOTICE_KEY, keyStruct, null, null); 130 | this.messageQueue.add(record); 131 | } catch (Exception ex) { 132 | if (log.isErrorEnabled()) { 133 | log.error("Exception thrown", ex); 134 | } 135 | } 136 | } 137 | 138 | @Override 139 | public void onTrackLimitationNotice(int i) { 140 | 141 | } 142 | 143 | @Override 144 | public void onScrubGeo(long l, long l1) { 145 | 146 | } 147 | 148 | @Override 149 | public void onStallWarning(StallWarning stallWarning) { 150 | if (log.isWarnEnabled()) { 151 | log.warn("code = '{}' percentFull = '{}' - {}", stallWarning.getCode(), stallWarning.getPercentFull(), stallWarning.getMessage()); 152 | } 153 | } 154 | 155 | @Override 156 | public void onException(Exception e) { 157 | if (log.isErrorEnabled()) { 158 | log.error("onException", e); 159 | } 160 | } 161 | } -------------------------------------------------------------------------------- /src/main/java/com/github/jcustenborder/kafka/connect/twitter/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | @Title("Twitter") 17 | @Introduction("The Twitter plugin is used to pull data from Twitter and write it to Kafka.") 18 | @PluginOwner("jcustenborder") 19 | @PluginName("kafka-connect-twitter") 20 | package com.github.jcustenborder.kafka.connect.twitter; 21 | 22 | import com.github.jcustenborder.kafka.connect.utils.config.Introduction; 23 | import com.github.jcustenborder.kafka.connect.utils.config.PluginName; 24 | import com.github.jcustenborder.kafka.connect.utils.config.PluginOwner; 25 | import com.github.jcustenborder.kafka.connect.utils.config.Title; -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/twitter/DocumentationTest.java: -------------------------------------------------------------------------------- 1 | package com.github.jcustenborder.kafka.connect.twitter; 2 | 3 | import com.github.jcustenborder.kafka.connect.utils.BaseDocumentationTest; 4 | import org.apache.kafka.connect.data.Schema; 5 | 6 | import java.lang.reflect.Field; 7 | import java.lang.reflect.Modifier; 8 | import java.util.Arrays; 9 | import java.util.List; 10 | import java.util.stream.Collectors; 11 | 12 | public class DocumentationTest extends BaseDocumentationTest { 13 | static Schema schema(Field field) { 14 | try { 15 | return (Schema) field.get(null); 16 | } catch (IllegalAccessException e) { 17 | throw new IllegalStateException(e); 18 | } 19 | } 20 | 21 | @Override 22 | protected List schemas() { 23 | List schemas = Arrays.stream(StatusConverter.class.getFields()) 24 | .filter(field -> Modifier.isFinal(field.getModifiers())) 25 | .filter(field -> Modifier.isStatic(field.getModifiers())) 26 | .filter(field -> Schema.class.equals(field.getType())) 27 | .map(DocumentationTest::schema) 28 | .collect(Collectors.toList()); 29 | return schemas; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/twitter/SchemaGeneratorTest.java: -------------------------------------------------------------------------------- 1 | package com.github.jcustenborder.kafka.connect.twitter; 2 | 3 | import com.google.common.base.CaseFormat; 4 | import org.junit.jupiter.api.Test; 5 | import org.reflections.Reflections; 6 | import org.reflections.util.ClasspathHelper; 7 | import org.reflections.util.ConfigurationBuilder; 8 | import twitter4j.MediaEntity; 9 | import twitter4j.TweetEntity; 10 | 11 | import java.lang.reflect.Method; 12 | import java.lang.reflect.Modifier; 13 | import java.util.ArrayList; 14 | import java.util.Comparator; 15 | import java.util.HashSet; 16 | import java.util.List; 17 | import java.util.Map; 18 | import java.util.Set; 19 | import java.util.stream.Collectors; 20 | 21 | public class SchemaGeneratorTest { 22 | 23 | List> list(Reflections reflections, Class cls) { 24 | List> classes = reflections.getSubTypesOf(cls) 25 | .stream() 26 | .filter(aClass -> Modifier.isInterface(aClass.getModifiers())) 27 | .collect(Collectors.toList()); 28 | classes.sort(Comparator.comparing(Class::getName)); 29 | return classes; 30 | } 31 | 32 | String schema(Class cls) { 33 | String result; 34 | 35 | if (String.class.equals(cls)) { 36 | result = "SchemaBuilder.string().optional().doc(\"\").build()"; 37 | } else if (int.class.equals(cls)) { 38 | result = "SchemaBuilder.int32().optional().doc(\"\").build()"; 39 | } else if (long.class.equals(cls)) { 40 | result = "SchemaBuilder.int64().optional().doc(\"\").build()"; 41 | } else if (cls.isArray()) { 42 | String childSchema = schema(cls.getComponentType()); 43 | result = String.format("SchemaBuilder.array(%s).optional().doc(\"\").build()", childSchema); 44 | } else if (Map.class.isAssignableFrom(cls)) { 45 | result = "SchemaBuilder.map(Schema.STRING_SCHEMA, SCHEMA_MEDIA_ENTITY_SIZE)"; 46 | 47 | } else { 48 | result = "SCHEMA_" + CaseFormat.UPPER_CAMEL.to(CaseFormat.UPPER_UNDERSCORE, cls.getSimpleName()).replace('$', '_'); 49 | } 50 | 51 | 52 | return result; 53 | } 54 | 55 | void processClass(Class cls, StringBuilder builder) { 56 | 57 | final String schemaConstantName; 58 | final String schemaName; 59 | final String typeName; 60 | final String convertMethodName; 61 | 62 | if (null == cls.getDeclaringClass()) { 63 | schemaConstantName = "SCHEMA_" + CaseFormat.UPPER_CAMEL.to(CaseFormat.UPPER_UNDERSCORE, cls.getSimpleName()); 64 | schemaName = String.format("com.github.jcustenborder.kafka.connect.twitter.%s", cls.getSimpleName()); 65 | typeName = cls.getSimpleName(); 66 | convertMethodName = String.format("convert%s", cls.getSimpleName()); 67 | } else { 68 | schemaConstantName = "SCHEMA_" + CaseFormat.UPPER_CAMEL.to(CaseFormat.UPPER_UNDERSCORE, cls.getDeclaringClass().getSimpleName() + cls.getSimpleName()); 69 | typeName = String.format("%s.%s", cls.getDeclaringClass().getSimpleName(), cls.getSimpleName()); 70 | schemaName = String.format("com.github.jcustenborder.kafka.connect.twitter.%s.%s", cls.getSimpleName(), cls.getDeclaringClass().getSimpleName()); 71 | convertMethodName = String.format("convert%s%s", cls.getDeclaringClass().getSimpleName(), cls.getSimpleName()); 72 | } 73 | 74 | 75 | builder.append(String.format("public static final Schema %s =SchemaBuilder.struct()\n", schemaConstantName)); 76 | builder.append(String.format(" .name(\"%s\")\n", schemaName)); 77 | builder.append(" .doc(\"\")\n"); 78 | 79 | Set methods = new HashSet<>(); 80 | for (Method method : cls.getMethods()) { 81 | String methodName = method.getName().replace("get", ""); 82 | if (!methods.add(methodName)) { 83 | continue; 84 | } 85 | String expectedSchema = schema(method.getReturnType()); 86 | builder.append(String.format(" .field(\"%s\", %s)\n", methodName, expectedSchema)); 87 | } 88 | builder.append(" .build();\n\n"); 89 | 90 | methods.clear(); 91 | String variableName = CaseFormat.UPPER_CAMEL.to(CaseFormat.LOWER_CAMEL, cls.getSimpleName()); 92 | builder.append(String.format("static Struct %s(%s %s) {\n", convertMethodName, typeName, variableName)); 93 | builder.append(String.format(" return new Struct(%s)", schemaConstantName)); 94 | for (Method method : cls.getMethods()) { 95 | String methodName = method.getName().replace("get", ""); 96 | if (!methods.add(methodName)) { 97 | continue; 98 | } 99 | builder.append(String.format("\n .put(\"%s\", %s.%s())", methodName, variableName, method.getName())); 100 | } 101 | builder.append(";\n }\n"); 102 | 103 | builder.append("\n"); 104 | builder.append(String.format("public static List convert(%s[] items) {\n", typeName)); 105 | builder.append(" List result = new ArrayList<>();\n"); 106 | builder.append(" if(null==items) {\n"); 107 | builder.append(" return result;\n"); 108 | builder.append(" }\n"); 109 | builder.append(String.format(" for(%s item: items) {\n", typeName)); 110 | builder.append(String.format(" Struct struct = %s(item);\n", convertMethodName)); 111 | builder.append(" result.add(struct);\n"); 112 | builder.append(" }\n"); 113 | builder.append(" return result;\n"); 114 | builder.append("}\n"); 115 | 116 | // } 117 | // public static List convert(UserMentionEntity[] userMentionEntities) { 118 | // List result = new ArrayList<>(); 119 | // if(null==userMentionEntities) { 120 | // return result; 121 | // } 122 | // for(UserMentionEntity item: userMentionEntities) { 123 | // Struct struct = convertUserMentionEntity(item); 124 | // result.add(struct); 125 | // } 126 | // return result; 127 | // } 128 | 129 | 130 | } 131 | 132 | @Test 133 | public void tweetEntities() { 134 | Reflections reflections = new Reflections(new ConfigurationBuilder() 135 | .setUrls(ClasspathHelper.forJavaClassPath()) 136 | .forPackages(TweetEntity.class.getPackage().getName()) 137 | ); 138 | 139 | List> allClasses = new ArrayList<>(); 140 | List> classes = list(reflections, TweetEntity.class); 141 | allClasses.add(MediaEntity.Variant.class); 142 | allClasses.add(MediaEntity.Size.class); 143 | allClasses.addAll(classes); 144 | 145 | 146 | for (Class cls : allClasses) { 147 | StringBuilder builder = new StringBuilder(); 148 | processClass(cls, builder); 149 | 150 | System.out.println(builder); 151 | } 152 | 153 | 154 | } 155 | 156 | 157 | } 158 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/twitter/StatusConverterTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.twitter; 17 | 18 | import org.apache.kafka.connect.data.Struct; 19 | import org.junit.jupiter.api.Test; 20 | import twitter4j.GeoLocation; 21 | import twitter4j.Place; 22 | import twitter4j.Status; 23 | import twitter4j.StatusDeletionNotice; 24 | import twitter4j.User; 25 | 26 | import java.util.ArrayList; 27 | import java.util.Date; 28 | import java.util.List; 29 | 30 | import static org.junit.jupiter.api.Assertions.assertEquals; 31 | import static org.junit.jupiter.api.Assertions.assertNotNull; 32 | import static org.mockito.Mockito.mock; 33 | import static org.mockito.Mockito.when; 34 | 35 | public class StatusConverterTest { 36 | 37 | public static GeoLocation mockGeoLocation() { 38 | return new GeoLocation(30.2672D, 97.7431D); 39 | } 40 | 41 | public static Place mockPlace() { 42 | Place place = mock(Place.class); 43 | when(place.getName()).thenReturn("Example place"); 44 | when(place.getStreetAddress()).thenReturn("123 Example St"); 45 | when(place.getCountryCode()).thenReturn("US"); 46 | when(place.getId()).thenReturn("asdfaisdfasd"); 47 | when(place.getCountry()).thenReturn("United States"); 48 | when(place.getPlaceType()).thenReturn("ADF"); 49 | when(place.getURL()).thenReturn("http://www.example.com/"); 50 | when(place.getFullName()).thenReturn("Example place"); 51 | return place; 52 | } 53 | 54 | public static Status mockStatus() { 55 | Status status = mock(Status.class); 56 | User user = mockUser(); 57 | GeoLocation geoLocation = mockGeoLocation(); 58 | Place place = mockPlace(); 59 | 60 | when(status.getCreatedAt()).thenReturn(new Date(1471667709998L)); 61 | when(status.getId()).thenReturn(9823452L); 62 | when(status.getText()).thenReturn("This is a twit"); 63 | when(status.getSource()).thenReturn("foo"); 64 | when(status.isTruncated()).thenReturn(false); 65 | when(status.getInReplyToStatusId()).thenReturn(2345234L); 66 | when(status.getInReplyToUserId()).thenReturn(8756786L); 67 | when(status.getInReplyToScreenName()).thenReturn("foo"); 68 | when(status.getGeoLocation()).thenReturn(geoLocation); 69 | when(status.getPlace()).thenReturn(place); 70 | when(status.isFavorited()).thenReturn(true); 71 | when(status.isRetweeted()).thenReturn(false); 72 | when(status.getFavoriteCount()).thenReturn(1234); 73 | when(status.getUser()).thenReturn(user); 74 | when(status.isRetweet()).thenReturn(false); 75 | when(status.getContributors()).thenReturn(new long[]{431234L, 986789678L}); 76 | when(status.getRetweetCount()).thenReturn(1234); 77 | when(status.isRetweetedByMe()).thenReturn(false); 78 | when(status.getCurrentUserRetweetId()).thenReturn(653456345L); 79 | when(status.isPossiblySensitive()).thenReturn(false); 80 | when(status.getLang()).thenReturn("en-US"); 81 | when(status.getWithheldInCountries()).thenReturn(new String[]{"CN"}); 82 | 83 | return status; 84 | } 85 | 86 | public static User mockUser() { 87 | User user = mock(User.class); 88 | 89 | when(user.getId()).thenReturn(1234L); 90 | when(user.getName()).thenReturn("Example User"); 91 | when(user.getScreenName()).thenReturn("example"); 92 | when(user.getLocation()).thenReturn("Austin, TX"); 93 | when(user.getDescription()).thenReturn("This is a description"); 94 | when(user.isContributorsEnabled()).thenReturn(true); 95 | when(user.getProfileImageURL()).thenReturn("http://i.twittercdn.com/profile.jpg"); 96 | when(user.getBiggerProfileImageURL()).thenReturn("http://i.twittercdn.com/biggerprofile.jpg"); 97 | when(user.getMiniProfileImageURL()).thenReturn("http://i.twittercdn.com/mini.profile.jpg"); 98 | when(user.getOriginalProfileImageURL()).thenReturn("http://i.twittercdn.com/original.profile.jpg"); 99 | when(user.getProfileImageURLHttps()).thenReturn("https://i.twittercdn.com/profile.jpg"); 100 | when(user.getBiggerProfileImageURLHttps()).thenReturn("https://i.twittercdn.com/bigger.profile.jpg"); 101 | when(user.getMiniProfileImageURLHttps()).thenReturn("https://i.twittercdn.com/mini.profile.jpg"); 102 | when(user.getOriginalProfileImageURLHttps()).thenReturn("https://i.twittercdn.com/original.profile.jpg"); 103 | when(user.isDefaultProfileImage()).thenReturn(true); 104 | when(user.getURL()).thenReturn("https://www.twitter.com/example"); 105 | when(user.isProtected()).thenReturn(false); 106 | when(user.getFollowersCount()).thenReturn(54245); 107 | when(user.getProfileBackgroundColor()).thenReturn("#ffffff"); 108 | when(user.getProfileTextColor()).thenReturn("#000000"); 109 | when(user.getProfileLinkColor()).thenReturn("#aaaaaa"); 110 | when(user.getProfileSidebarFillColor()).thenReturn("#333333"); 111 | when(user.getProfileSidebarBorderColor()).thenReturn("#555555"); 112 | when(user.isProfileUseBackgroundImage()).thenReturn(true); 113 | when(user.isDefaultProfile()).thenReturn(true); 114 | when(user.isShowAllInlineMedia()).thenReturn(true); 115 | when(user.getFriendsCount()).thenReturn(452345234); 116 | when(user.getCreatedAt()).thenReturn(new Date(1471665653209L)); 117 | when(user.getFavouritesCount()).thenReturn(12341); 118 | when(user.getUtcOffset()).thenReturn(8); 119 | when(user.getTimeZone()).thenReturn("UTC"); 120 | when(user.getProfileBackgroundImageURL()).thenReturn("https://i.twittercdn.com/original.background.jpg"); 121 | when(user.getProfileBackgroundImageUrlHttps()).thenReturn("https://i.twittercdn.com/original.background.jpg"); 122 | when(user.getProfileBannerURL()).thenReturn("https://i.twittercdn.com/original.banner.jpg"); 123 | when(user.getProfileBannerRetinaURL()).thenReturn("https://i.twittercdn.com/original.banner.jpg"); 124 | when(user.getProfileBannerIPadURL()).thenReturn("https://i.twittercdn.com/original.banner.jpg"); 125 | when(user.getProfileBannerIPadRetinaURL()).thenReturn("https://i.twittercdn.com/original.banner.jpg"); 126 | when(user.getProfileBannerMobileURL()).thenReturn("https://i.twittercdn.com/original.banner.jpg"); 127 | when(user.getProfileBannerMobileRetinaURL()).thenReturn("https://i.twittercdn.com/original.banner.jpg"); 128 | when(user.isProfileBackgroundTiled()).thenReturn(false); 129 | when(user.getLang()).thenReturn("en-us"); 130 | when(user.getStatusesCount()).thenReturn(543); 131 | when(user.isGeoEnabled()).thenReturn(true); 132 | when(user.isVerified()).thenReturn(true); 133 | when(user.isTranslator()).thenReturn(false); 134 | when(user.getListedCount()).thenReturn(4); 135 | when(user.isFollowRequestSent()).thenReturn(false); 136 | when(user.getWithheldInCountries()).thenReturn(new String[]{"CN"}); 137 | 138 | 139 | return user; 140 | } 141 | 142 | public static StatusDeletionNotice mockStatusDeletionNotice() { 143 | StatusDeletionNotice statusDeletionNotice = mock(StatusDeletionNotice.class); 144 | when(statusDeletionNotice.getStatusId()).thenReturn(1234565345L); 145 | when(statusDeletionNotice.getUserId()).thenReturn(6543456354L); 146 | return statusDeletionNotice; 147 | } 148 | 149 | List convert(long[] values) { 150 | List list = new ArrayList<>(); 151 | for (Long l : values) { 152 | list.add(l); 153 | } 154 | return list; 155 | } 156 | 157 | List convert(String[] values) { 158 | List list = new ArrayList<>(); 159 | for (String l : values) { 160 | list.add(l); 161 | } 162 | return list; 163 | } 164 | 165 | void assertStatus(Status status, Struct struct) { 166 | assertEquals(status.getCreatedAt(), struct.get("CreatedAt"), "CreatedAt does not match."); 167 | assertEquals(status.getId(), struct.get("Id"), "Id does not match."); 168 | assertEquals(status.getText(), struct.get("Text"), "Text does not match."); 169 | assertEquals(status.getSource(), struct.get("Source"), "Source does not match."); 170 | assertEquals(status.isTruncated(), struct.get("Truncated"), "Truncated does not match."); 171 | assertEquals(status.getInReplyToStatusId(), struct.get("InReplyToStatusId"), "InReplyToStatusId does not match."); 172 | assertEquals(status.getInReplyToUserId(), struct.get("InReplyToUserId"), "InReplyToUserId does not match."); 173 | assertEquals(status.getInReplyToScreenName(), struct.get("InReplyToScreenName"), "InReplyToScreenName does not match."); 174 | assertEquals(status.isFavorited(), struct.get("Favorited"), "Favorited does not match."); 175 | assertEquals(status.isRetweeted(), struct.get("Retweeted"), "Retweeted does not match."); 176 | assertEquals(status.getFavoriteCount(), struct.get("FavoriteCount"), "FavoriteCount does not match."); 177 | assertEquals(status.isRetweet(), struct.get("Retweet"), "Retweet does not match."); 178 | assertEquals(status.getRetweetCount(), struct.get("RetweetCount"), "RetweetCount does not match."); 179 | assertEquals(status.isRetweetedByMe(), struct.get("RetweetedByMe"), "RetweetedByMe does not match."); 180 | assertEquals(status.getCurrentUserRetweetId(), struct.get("CurrentUserRetweetId"), "CurrentUserRetweetId does not match."); 181 | assertEquals(status.isPossiblySensitive(), struct.get("PossiblySensitive"), "PossiblySensitive does not match."); 182 | assertEquals(status.getLang(), struct.get("Lang"), "Lang does not match."); 183 | 184 | assertUser(status.getUser(), struct.getStruct("User")); 185 | assertPlace(status.getPlace(), struct.getStruct("Place")); 186 | assertGeoLocation(status.getGeoLocation(), struct.getStruct("GeoLocation")); 187 | 188 | assertEquals(convert(status.getContributors()), struct.getArray("Contributors"), "Contributors does not match."); 189 | assertEquals(convert(status.getWithheldInCountries()), struct.get("WithheldInCountries"), "WithheldInCountries does not match."); 190 | } 191 | 192 | void assertGeoLocation(GeoLocation geoLocation, Struct struct) { 193 | assertEquals(struct.getFloat64("Latitude"), 1, geoLocation.getLatitude()); 194 | assertEquals(struct.getFloat64("Longitude"), 1, geoLocation.getLongitude()); 195 | } 196 | 197 | void assertPlace(Place place, Struct struct) { 198 | assertEquals(place.getName(), struct.get("Name"), "Name does not match."); 199 | assertEquals(place.getStreetAddress(), struct.get("StreetAddress"), "StreetAddress does not match."); 200 | assertEquals(place.getCountryCode(), struct.get("CountryCode"), "CountryCode does not match."); 201 | assertEquals(place.getId(), struct.get("Id"), "Id does not match."); 202 | assertEquals(place.getCountry(), struct.get("Country"), "Country does not match."); 203 | assertEquals(place.getPlaceType(), struct.get("PlaceType"), "PlaceType does not match."); 204 | assertEquals(place.getURL(), struct.get("URL"), "URL does not match."); 205 | assertEquals(place.getFullName(), struct.get("FullName"), "FullName does not match."); 206 | } 207 | 208 | void assertUser(User user, Struct struct) { 209 | assertNotNull(struct, "struct should not be null."); 210 | assertEquals(user.getId(), struct.get("Id"), "Id does not match."); 211 | assertEquals(user.getName(), struct.get("Name"), "Name does not match."); 212 | assertEquals(user.getScreenName(), struct.get("ScreenName"), "ScreenName does not match."); 213 | assertEquals(user.getLocation(), struct.get("Location"), "Location does not match."); 214 | assertEquals(user.getDescription(), struct.get("Description"), "Description does not match."); 215 | assertEquals(user.isContributorsEnabled(), struct.get("ContributorsEnabled"), "ContributorsEnabled does not match."); 216 | assertEquals(user.getProfileImageURL(), struct.get("ProfileImageURL"), "ProfileImageURL does not match."); 217 | assertEquals(user.getBiggerProfileImageURL(), struct.get("BiggerProfileImageURL"), "BiggerProfileImageURL does not match."); 218 | assertEquals(user.getMiniProfileImageURL(), struct.get("MiniProfileImageURL"), "MiniProfileImageURL does not match."); 219 | assertEquals(user.getOriginalProfileImageURL(), struct.get("OriginalProfileImageURL"), "OriginalProfileImageURL does not match."); 220 | assertEquals(user.getProfileImageURLHttps(), struct.get("ProfileImageURLHttps"), "ProfileImageURLHttps does not match."); 221 | assertEquals(user.getBiggerProfileImageURLHttps(), struct.get("BiggerProfileImageURLHttps"), "BiggerProfileImageURLHttps does not match."); 222 | assertEquals(user.getMiniProfileImageURLHttps(), struct.get("MiniProfileImageURLHttps"), "MiniProfileImageURLHttps does not match."); 223 | assertEquals(user.getOriginalProfileImageURLHttps(), struct.get("OriginalProfileImageURLHttps"), "OriginalProfileImageURLHttps does not match."); 224 | assertEquals(user.isDefaultProfileImage(), struct.get("DefaultProfileImage"), "DefaultProfileImage does not match."); 225 | assertEquals(user.getURL(), struct.get("URL"), "URL does not match."); 226 | assertEquals(user.isProtected(), struct.get("Protected"), "Protected does not match."); 227 | assertEquals(user.getFollowersCount(), struct.get("FollowersCount"), "FollowersCount does not match."); 228 | assertEquals(user.getProfileBackgroundColor(), struct.get("ProfileBackgroundColor"), "ProfileBackgroundColor does not match."); 229 | assertEquals(user.getProfileTextColor(), struct.get("ProfileTextColor"), "ProfileTextColor does not match."); 230 | assertEquals(user.getProfileLinkColor(), struct.get("ProfileLinkColor"), "ProfileLinkColor does not match."); 231 | assertEquals(user.getProfileSidebarFillColor(), struct.get("ProfileSidebarFillColor"), "ProfileSidebarFillColor does not match."); 232 | assertEquals(user.getProfileSidebarBorderColor(), struct.get("ProfileSidebarBorderColor"), "ProfileSidebarBorderColor does not match."); 233 | assertEquals(user.isProfileUseBackgroundImage(), struct.get("ProfileUseBackgroundImage"), "ProfileUseBackgroundImage does not match."); 234 | assertEquals(user.isDefaultProfile(), struct.get("DefaultProfile"), "DefaultProfile does not match."); 235 | assertEquals(user.isShowAllInlineMedia(), struct.get("ShowAllInlineMedia"), "ShowAllInlineMedia does not match."); 236 | assertEquals(user.getFriendsCount(), struct.get("FriendsCount"), "FriendsCount does not match."); 237 | assertEquals(user.getCreatedAt(), struct.get("CreatedAt"), "CreatedAt does not match."); 238 | assertEquals(user.getFavouritesCount(), struct.get("FavouritesCount"), "FavouritesCount does not match."); 239 | assertEquals(user.getUtcOffset(), struct.get("UtcOffset"), "UtcOffset does not match."); 240 | assertEquals(user.getTimeZone(), struct.get("TimeZone"), "TimeZone does not match."); 241 | assertEquals(user.getProfileBackgroundImageURL(), struct.get("ProfileBackgroundImageURL"), "ProfileBackgroundImageURL does not match."); 242 | assertEquals(user.getProfileBackgroundImageUrlHttps(), struct.get("ProfileBackgroundImageUrlHttps"), "ProfileBackgroundImageUrlHttps does not match."); 243 | assertEquals(user.getProfileBannerURL(), struct.get("ProfileBannerURL"), "ProfileBannerURL does not match."); 244 | assertEquals(user.getProfileBannerRetinaURL(), struct.get("ProfileBannerRetinaURL"), "ProfileBannerRetinaURL does not match."); 245 | assertEquals(user.getProfileBannerIPadURL(), struct.get("ProfileBannerIPadURL"), "ProfileBannerIPadURL does not match."); 246 | assertEquals(user.getProfileBannerIPadRetinaURL(), struct.get("ProfileBannerIPadRetinaURL"), "ProfileBannerIPadRetinaURL does not match."); 247 | assertEquals(user.getProfileBannerMobileURL(), struct.get("ProfileBannerMobileURL"), "ProfileBannerMobileURL does not match."); 248 | assertEquals(user.getProfileBannerMobileRetinaURL(), struct.get("ProfileBannerMobileRetinaURL"), "ProfileBannerMobileRetinaURL does not match."); 249 | assertEquals(user.isProfileBackgroundTiled(), struct.get("ProfileBackgroundTiled"), "ProfileBackgroundTiled does not match."); 250 | assertEquals(user.getLang(), struct.get("Lang"), "Lang does not match."); 251 | assertEquals(user.getStatusesCount(), struct.get("StatusesCount"), "StatusesCount does not match."); 252 | assertEquals(user.isGeoEnabled(), struct.get("GeoEnabled"), "GeoEnabled does not match."); 253 | assertEquals(user.isVerified(), struct.get("Verified"), "Verified does not match."); 254 | assertEquals(user.isTranslator(), struct.get("Translator"), "Translator does not match."); 255 | assertEquals(user.getListedCount(), struct.get("ListedCount"), "ListedCount does not match."); 256 | assertEquals(user.isFollowRequestSent(), struct.get("FollowRequestSent"), "FollowRequestSent does not match."); 257 | } 258 | 259 | void assertKey(Status status, Struct struct) { 260 | assertEquals(status.getId(), struct.get("Id"), "Id does not match."); 261 | } 262 | 263 | @Test 264 | public void convertStatus() { 265 | Status status = mockStatus(); 266 | Struct struct = new Struct(StatusConverter.STATUS_SCHEMA); 267 | StatusConverter.convert(status, struct); 268 | assertStatus(status, struct); 269 | } 270 | 271 | @Test 272 | public void convertUser() { 273 | User user = mockUser(); 274 | Struct struct = new Struct(StatusConverter.USER_SCHEMA); 275 | StatusConverter.convert(user, struct); 276 | assertUser(user, struct); 277 | } 278 | 279 | @Test 280 | public void convertPlace() { 281 | Place place = mockPlace(); 282 | Struct struct = new Struct(StatusConverter.PLACE_SCHEMA); 283 | StatusConverter.convert(place, struct); 284 | assertPlace(place, struct); 285 | } 286 | 287 | @Test 288 | public void convertGeoLocation() { 289 | GeoLocation geoLocation = mockGeoLocation(); 290 | Struct struct = new Struct(StatusConverter.GEO_LOCATION_SCHEMA); 291 | StatusConverter.convert(geoLocation, struct); 292 | assertGeoLocation(geoLocation, struct); 293 | } 294 | 295 | @Test 296 | public void convertStatusKey() { 297 | Status status = mockStatus(); 298 | Struct struct = new Struct(StatusConverter.STATUS_SCHEMA_KEY); 299 | StatusConverter.convertKey(status, struct); 300 | assertKey(status, struct); 301 | } 302 | 303 | void assertStatusDeletionNotice(StatusDeletionNotice statusDeletionNotice, Struct struct) { 304 | assertEquals(statusDeletionNotice.getStatusId(), struct.get("StatusId"), "StatusId does not match."); 305 | assertEquals(statusDeletionNotice.getUserId(), struct.get("UserId"), "UserId does not match."); 306 | } 307 | 308 | void assertStatusDeletionNoticeKey(StatusDeletionNotice statusDeletionNotice, Struct struct) { 309 | assertEquals(statusDeletionNotice.getStatusId(), struct.get("StatusId"), "StatusId does not match."); 310 | } 311 | 312 | @Test 313 | public void convertStatusDeletionNotice() { 314 | StatusDeletionNotice statusDeletionNotice = mockStatusDeletionNotice(); 315 | Struct struct = new Struct(StatusConverter.SCHEMA_STATUS_DELETION_NOTICE); 316 | StatusConverter.convert(statusDeletionNotice, struct); 317 | assertStatusDeletionNotice(statusDeletionNotice, struct); 318 | } 319 | 320 | @Test 321 | public void convertKeyStatusDeletionNotice() { 322 | StatusDeletionNotice statusDeletionNotice = mockStatusDeletionNotice(); 323 | Struct struct = new Struct(StatusConverter.SCHEMA_STATUS_DELETION_NOTICE_KEY); 324 | StatusConverter.convertKey(statusDeletionNotice, struct); 325 | assertStatusDeletionNoticeKey(statusDeletionNotice, struct); 326 | } 327 | } 328 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/twitter/TwitterSourceConnectorTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.twitter; 17 | 18 | 19 | import com.google.common.base.Joiner; 20 | import com.google.common.collect.ImmutableMap; 21 | import org.junit.jupiter.api.BeforeEach; 22 | import org.junit.jupiter.api.DynamicTest; 23 | import org.junit.jupiter.api.TestFactory; 24 | 25 | import java.util.ArrayList; 26 | import java.util.Arrays; 27 | import java.util.LinkedHashMap; 28 | import java.util.List; 29 | import java.util.Map; 30 | import java.util.stream.Stream; 31 | 32 | import static org.junit.jupiter.api.Assertions.assertEquals; 33 | import static org.junit.jupiter.api.DynamicTest.dynamicTest; 34 | 35 | public class TwitterSourceConnectorTest { 36 | 37 | TwitterSourceConnector connector; 38 | Map defaultSettings; 39 | 40 | @BeforeEach 41 | public void setup() { 42 | this.connector = new TwitterSourceConnector(); 43 | this.defaultSettings = new LinkedHashMap<>(); 44 | this.defaultSettings.put(TwitterSourceConnectorConfig.TWITTER_OAUTH_ACCESS_TOKEN_CONF, "xxxxxx"); 45 | this.defaultSettings.put(TwitterSourceConnectorConfig.TWITTER_OAUTH_SECRET_KEY_CONF, "xxxxxx"); 46 | this.defaultSettings.put(TwitterSourceConnectorConfig.TWITTER_OAUTH_CONSUMER_KEY_CONF, "xxxxxx"); 47 | this.defaultSettings.put(TwitterSourceConnectorConfig.TWITTER_OAUTH_ACCESS_TOKEN_SECRET_CONF, "xxxxxx"); 48 | this.defaultSettings.put(TwitterSourceConnectorConfig.KAFKA_STATUS_TOPIC_CONF, "xxxxxx"); 49 | this.defaultSettings.put(TwitterSourceConnectorConfig.PROCESS_DELETES_CONF, "false"); 50 | 51 | } 52 | 53 | List> expectedSettings(List... keywords) { 54 | List> result = new ArrayList<>(); 55 | for (List keywordSet : keywords) { 56 | Map settings = new LinkedHashMap<>(this.defaultSettings); 57 | settings.put(TwitterSourceConnectorConfig.FILTER_KEYWORDS_CONF, Joiner.on(',').join(keywordSet)); 58 | result.add(settings); 59 | } 60 | return result; 61 | } 62 | 63 | @TestFactory 64 | public Stream taskConfigs() { 65 | 66 | Map>> testCases = ImmutableMap.of( 67 | 1, expectedSettings(Arrays.asList("one", "two", "three")), 68 | 2, expectedSettings(Arrays.asList("one", "three"), Arrays.asList("two")), 69 | 3, expectedSettings(Arrays.asList("one"), Arrays.asList("two"), Arrays.asList("three")) 70 | ); 71 | 72 | return testCases.entrySet().stream() 73 | .map(e -> dynamicTest(e.getKey().toString(), () -> { 74 | this.defaultSettings.put(TwitterSourceConnectorConfig.FILTER_KEYWORDS_CONF, "one,two,three"); 75 | this.connector.start(this.defaultSettings); 76 | List> taskConfigs = this.connector.taskConfigs(e.getKey()); 77 | assertEquals( 78 | e.getValue(), 79 | taskConfigs 80 | ); 81 | })); 82 | 83 | 84 | } 85 | 86 | } 87 | -------------------------------------------------------------------------------- /src/test/java/com/github/jcustenborder/kafka/connect/twitter/TwitterSourceTaskTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.jcustenborder.kafka.connect.twitter; 17 | 18 | 19 | import org.junit.jupiter.api.Test; 20 | 21 | public class TwitterSourceTaskTest { 22 | @Test 23 | public void test() { 24 | // Congrats on a passing test! 25 | } 26 | } -------------------------------------------------------------------------------- /src/test/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | --------------------------------------------------------------------------------