├── .github └── workflows │ ├── snyk.yml │ ├── snyk_test.yml │ └── test_and_deploy.yml ├── .gitignore ├── .scalafmt.conf ├── .snyk ├── CHANGELOG ├── LICENSE-2.0.txt ├── README.md ├── benchmark ├── build.sbt └── src │ └── test │ └── scala │ └── com.snowplowanalytics.snowplow.analytics.scalasdk │ └── benchmark │ ├── OrderedBenchmark.scala │ └── ToTsvBenchmark.scala ├── build.sbt ├── project ├── BuildSettings.scala ├── Dependencies.scala ├── build.properties ├── plugins.sbt └── travis-deploy-key.enc └── src ├── main ├── scala-2 │ └── com.snowplowanalytics.snowplow.analytics.scalasdk │ │ └── decode │ │ ├── Parser.scala │ │ └── RowDecoderCompanion.scala ├── scala-3 │ └── com.snowplowanalytics.snowplow.analytics.scalasdk │ │ └── decode │ │ ├── Parser.scala │ │ └── RowDecoderCompanion.scala └── scala │ └── com.snowplowanalytics.snowplow.analytics.scalasdk │ ├── Common.scala │ ├── Data.scala │ ├── Event.scala │ ├── ParsingError.scala │ ├── SnowplowEvent.scala │ ├── decode │ ├── RowDecoder.scala │ ├── TSVParser.scala │ ├── ValueDecoder.scala │ └── package.scala │ ├── encode │ └── TsvEncoder.scala │ └── validate │ └── package.scala ├── site-preprocess └── index.html └── test └── scala └── com.snowplowanalytics.snowplow.analytics.scalasdk ├── EventGen.scala ├── EventSpec.scala ├── ParsingErrorSpec.scala ├── SnowplowEventSpec.scala └── decode └── ValueDecoderSpec.scala /.github/workflows/snyk.yml: -------------------------------------------------------------------------------- 1 | name: Snyk 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | 7 | jobs: 8 | check-vulnerabilities: 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | - uses: actions/checkout@v2 13 | 14 | - name: Snyk monitor - Check for vulnerabilities 15 | uses: snyk/actions/scala@master 16 | with: 17 | command: monitor 18 | args: --project-name=analytics-sdk-scala 19 | env: 20 | SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} -------------------------------------------------------------------------------- /.github/workflows/snyk_test.yml: -------------------------------------------------------------------------------- 1 | name: Snyk 2 | 3 | on: push 4 | 5 | 6 | jobs: 7 | check-vulnerabilities: 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@v2 12 | 13 | - name: Snyk monitor - Check for vulnerabilities 14 | uses: snyk/actions/scala@master 15 | with: 16 | command: test 17 | env: 18 | SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} -------------------------------------------------------------------------------- /.github/workflows/test_and_deploy.yml: -------------------------------------------------------------------------------- 1 | name: Test and deploy 2 | 3 | on: push 4 | 5 | jobs: 6 | test_and_deploy: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - uses: actions/checkout@v2 11 | 12 | - name: Set up JDK 11 13 | uses: actions/setup-java@v1 14 | with: 15 | java-version: 11 16 | 17 | - name: Run tests 18 | run: sbt 'set coverageEnabled := true' +test 19 | 20 | - name: Aggregate coverage data 21 | run: sbt coverageAggregate 22 | 23 | - name: Check formatting 24 | run: sbt scalafmtCheck 25 | 26 | - name: Publish to Maven central 27 | if: startsWith(github.ref, 'refs/tags/') 28 | env: 29 | PGP_PASSPHRASE: ${{ secrets.SONA_PGP_PASSPHRASE }} 30 | PGP_SECRET: ${{ secrets.SONA_PGP_SECRET }} 31 | SONATYPE_USERNAME: ${{ secrets.SONA_USER }} 32 | SONATYPE_PASSWORD: ${{ secrets.SONA_PASS }} 33 | run: sbt ci-release 34 | 35 | - name: Generate API website 36 | if: startsWith(github.ref, 'refs/tags/') 37 | run: sbt makeSite 38 | 39 | - name: Publish website 40 | if: startsWith(github.ref, 'refs/tags/') 41 | run: | 42 | echo Publishing Scaladoc 43 | git fetch 44 | git checkout gh-pages 45 | cp -r target/site/* . 46 | git config user.name "GitHub Actions" 47 | git config user.email "<>" 48 | git add index.html $project_version 49 | git commit -m "Added Scaladoc for $project_version" 50 | git push origin gh-pages 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | *.log 3 | 4 | # sbt specific 5 | dist/* 6 | target/ 7 | lib_managed/ 8 | src_managed/ 9 | project/boot/ 10 | project/plugins/project/ 11 | 12 | # Vagrant 13 | .vagrant 14 | VERSION 15 | .bloop 16 | .metals 17 | metals.sbt 18 | .vscode 19 | -------------------------------------------------------------------------------- /.scalafmt.conf: -------------------------------------------------------------------------------- 1 | version = "2.6.1" 2 | style = default 3 | align.preset = none 4 | align.openParenCallSite = true 5 | align.arrowEnumeratorGenerator = true 6 | maxColumn = 140 7 | docstrings = JavaDoc 8 | optIn.breakChainOnFirstMethodDot = true 9 | spaces.afterKeywordBeforeParen = true 10 | continuationIndent.callSite = 2 11 | continuationIndent.defnSite = 2 12 | verticalMultiline.atDefnSite = true 13 | verticalMultiline.arityThreshold = 3 14 | verticalMultiline.newlineAfterOpenParen = true 15 | verticalMultiline.newlineBeforeImplicitKW = true 16 | verticalMultiline.excludeDanglingParens = [] 17 | importSelectors = noBinPack 18 | rewrite.rules = [ 19 | AsciiSortImports, 20 | RedundantBraces, 21 | RedundantParens, 22 | PreferCurlyFors 23 | ] 24 | runner.dialect = scala212 25 | 26 | fileOverride { 27 | "glob:**/scala-3/**/*.scala" { 28 | runner.dialect = dotty 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /.snyk: -------------------------------------------------------------------------------- 1 | ignore: 2 | 'SNYK-JAVA-ORGTYPELEVEL-2331743': 3 | - '*': 4 | reason: No fix available 5 | expires: 2022-10-01T17:33:45.004Z -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | Version 3.2.2 (2024-11-06) 2 | -------------------------- 3 | Fix for `_schema_version` overriding non-object entity data (#137) 4 | 5 | Version 3.2.1 (2024-01-10) 6 | -------------------------- 7 | Use only version for `_schema_version` context field (#134) 8 | 9 | Version 3.2.0 (2023-12-01) 10 | -------------------------- 11 | Parse Event from a ByteBuffer (#130) 12 | Add _schema_version field to each context during toShreddedJson transformation (#132) 13 | 14 | Version 3.1.0 (2023-03-17) 15 | -------------------------- 16 | Disable validation of field lengths when parsing event (#127) 17 | Build with Scala-3 (#124) 18 | 19 | Version 3.0.1 (2022-05-10) 20 | -------------------------- 21 | 3.0.0 was not published to maven (#121) 22 | 23 | Version 3.0.0 (2022-02-09) 24 | -------------------------- 25 | Event parser should fail on oversized fields (#115) 26 | Fix scoverage failures in github actions (#119) 27 | Update github workflows fix snyk vulnerability scanning (#120) 28 | 29 | Version 2.1.0 (2020-11-09) 30 | -------------------------- 31 | Update README to point to docs (#110) 32 | Add scalafmt (#112) 33 | Migrate from Travis to GH actions (#113) 34 | Integrate Snyk (#111) 35 | Add toTSV method (#97) 36 | Replace toJsonMap function with a lazy value (#107) 37 | Add benchmarking module (#108) 38 | 39 | Version 2.0.1 (2020-06-10) 40 | -------------------------- 41 | Fix Travis publish condition (#105) 42 | 43 | Version 2.0.0 (2020-06-09) 44 | -------------------------- 45 | Remove run manifest (#102) 46 | Add Scala 2.13 support (#101) 47 | Bump sbt-scoverage to 1.6.1 (#104) 48 | Bump Scala to 2.12.11 (#100) 49 | Bump sbt to 1.3.10 (#99) 50 | Bump iglu-core-circe to 1.0.0 (#98) 51 | 52 | Version 1.0.0 (2019-11-06) 53 | -------------------------- 54 | Make parsing errors type-safe (#75) 55 | Add function to create minimal event (#81) 56 | Deprecate run manifest (#86) 57 | Fix empty contexts and unstruct_event decoding bug (#92) 58 | Integrate MiMa (#87) 59 | Integrate scoverage (#90) 60 | Integrate sbt-gh-pages to create GH Pages from Scaladoc (#91) 61 | Remove Vagrant setup (#84) 62 | Add Travis CI secret key (#93) 63 | Add encryption label to .travis.yml (#94) 64 | Extend copyright notice to 2019 (#85) 65 | 66 | Version 0.4.2 (2019-08-06) 67 | -------------------------- 68 | Bump iglu-core to 0.5.1 (#73) 69 | Bump SBT to 1.2.8 (#74) 70 | Add decoder for Event (#78) 71 | Change Travis distribution to Trusty (#82) 72 | 73 | Version 0.4.1 (2018-03-05) 74 | -------------------------- 75 | Fix refr_dvce_tstamp field naming (#70) 76 | 77 | Version 0.4.0 (2018-02-13) 78 | -------------------------- 79 | Remove deprecated EventTransformer API (#68) 80 | Add type-safe Event API (#53) 81 | Bump Scala 2.11 to 2.11.12 in CI (#69) 82 | Bump Scala 2.12 to 2.12.8 (#66) 83 | Bump scalacheck to 1.14.0 (#65) 84 | Bump specs2 to 4.4.1 (#64) 85 | Bump aws-java-sdk to 1.11.490 (#63) 86 | Drop Scala 2.10 (#59) 87 | 88 | Version 0.3.2 (2018-08-24) 89 | -------------------------- 90 | Fix specification assuming unstruct_event field contains contexts envelope (#57) 91 | Fix non-flattening algorithm not returning inventory items (#58) 92 | 93 | Version 0.3.1 (2018-07-24) 94 | -------------------------- 95 | Add option to not flatten self-describing fields (#56) 96 | 97 | Version 0.3.0 (2018-03-06) 98 | -------------------------- 99 | Add Scala 2.12 support (#41) 100 | Bump json4s to 3.2.11 (#46) 101 | Bump aws-java-sdk to 1.11.289 (#48) 102 | Bump Scala 2.11 to 2.11.12 (#47) 103 | Bump SBT to 1.1.1 (#49) 104 | Extend copyright notice to 2018 (#51) 105 | Change se_value to Double (#52) 106 | 107 | Version 0.2.1 (2017-11-20) 108 | -------------------------- 109 | Fix non-merging matching contexts (#44) 110 | 111 | Version 0.2.0 (2017-05-24) 112 | -------------------------- 113 | Bump SBT to 0.13.15 (#32) 114 | Bump specs2 to 3.8.9 (#33) 115 | Add support for checking and setting a DynamoDB-backed run manifest (#31) 116 | Add transformWithInventory to the JSON EventTransformer (#34) 117 | JSON Event Transformer: don't add null unstruct_event and contexts fields to output (#11) 118 | Replace Scalaz Validation with Scala Either (#20) 119 | Use standard regular expression for schema URIs (#22) 120 | Allow empty custom contexts (#27) 121 | Add CI/CD to project (#18) 122 | Add Sonatype credentials to .travis.yml (#39) 123 | Add Bintray credentials to .travis.yml (#17) 124 | Update README markdown in according with CommonMark (#28) 125 | Migrate setup guide from README to dedicated snowplow/snowplow wiki page (#29) 126 | Migrate usage guide from README to dedicated snowplow/snowplow wiki page (#30) 127 | 128 | Version 0.1.1 (2016-07-27) 129 | -------------------------- 130 | Allow organisations in Iglu schema URIs to contain hyphens (#12) 131 | 132 | Version 0.1.0 (2016-03-22) 133 | -------------------------- 134 | Initial release 135 | -------------------------------------------------------------------------------- /LICENSE-2.0.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Snowplow Scala Analytics SDK 2 | 3 | [![Build Status][travis-image]][travis] 4 | [![Release][release-image]][releases] 5 | [![License][license-image]][license] 6 | 7 | ## Overview 8 | 9 | **[Snowplow][snowplow]** Analytics SDK for Scala lets you work with **[Snowplow enriched events][enriched-events]** in your Scala event processing and data modeling jobs. 10 | 11 | Use this SDK with **[Apache Spark][spark]**, **[AWS Lambda][lambda]**, **[Apache Flink][flink]**, **[Scalding][scalding]**, **[Apache Samza][samza]** and other Scala/JVM-compatible data processing frameworks. 12 | 13 | ## Documentation 14 | 15 | Setup guide and user guide can be found on our [documentation website](https://docs.snowplowanalytics.com/docs/modeling-your-data/analytics-sdk/analytics-sdk-scala/). 16 | 17 | Scaladoc for this project can be found as Github pages [here][scala-doc]. 18 | 19 | ## Benchmarking 20 | 21 | This project comes with [sbt-jmh](https://github.com/ktoso/sbt-jmh). 22 | 23 | Benchmarks need to be added [here](./benchmark/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/benchmark/). 24 | 25 | They can be run with `sbt "project benchmark" "+jmh:run -i 10 -wi 3 -f2 -t3"`. 26 | 27 | To get details about the parameters `jmh:run -h`. 28 | 29 | ## Copyright and license 30 | 31 | The Snowplow Scala Analytics SDK is copyright 2016-2019 Snowplow Analytics Ltd. 32 | 33 | Licensed under the **[Apache License, Version 2.0][license]** (the "License"); 34 | you may not use this software except in compliance with the License. 35 | 36 | Unless required by applicable law or agreed to in writing, software 37 | distributed under the License is distributed on an "AS IS" BASIS, 38 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 39 | See the License for the specific language governing permissions and 40 | limitations under the License. 41 | 42 | [travis-image]: https://travis-ci.org/snowplow/snowplow-scala-analytics-sdk.png?branch=master 43 | [travis]: http://travis-ci.org/snowplow/snowplow-scala-analytics-sdk 44 | 45 | [license-image]: http://img.shields.io/badge/license-Apache--2-blue.svg?style=flat 46 | [license]: http://www.apache.org/licenses/LICENSE-2.0 47 | 48 | [release-image]: http://img.shields.io/badge/release-3.2.2-blue.svg?style=flat 49 | [releases]: https://github.com/snowplow/snowplow-scala-analytics-sdk/releases 50 | 51 | [scala-doc]: http://snowplow.github.io/snowplow-scala-analytics-sdk/ 52 | 53 | [enriched-events]: https://docs.snowplowanalytics.com/docs/understanding-your-pipeline/canonical-event/ 54 | 55 | [spark]: http://spark.apache.org/ 56 | [lambda]: https://aws.amazon.com/lambda/ 57 | [flink]: https://flink.apache.org/ 58 | [scalding]: https://github.com/twitter/scalding 59 | [samza]: http://samza.apache.org/ 60 | -------------------------------------------------------------------------------- /benchmark/build.sbt: -------------------------------------------------------------------------------- 1 | sourceDirectory in Jmh := (sourceDirectory in Test).value 2 | classDirectory in Jmh := (classDirectory in Test).value 3 | dependencyClasspath in Jmh := (dependencyClasspath in Test).value 4 | // rewire tasks, so that 'jmh:run' automatically invokes 'jmh:compile' (otherwise a clean 'jmh:run' would fail) 5 | compile in Jmh := (compile in Jmh).dependsOn(compile in Test).value 6 | run in Jmh := (run in Jmh).dependsOn(Keys.compile in Jmh).evaluated -------------------------------------------------------------------------------- /benchmark/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/benchmark/OrderedBenchmark.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2020 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.snowplow.analytics.scalasdk.benchmark 14 | 15 | import org.openjdk.jmh.annotations._ 16 | 17 | import java.util.concurrent.TimeUnit 18 | import java.util.UUID 19 | import java.time.Instant 20 | 21 | import com.snowplowanalytics.snowplow.analytics.scalasdk.Event 22 | 23 | @State(Scope.Thread) 24 | @BenchmarkMode(Array(Mode.AverageTime, Mode.Throughput)) 25 | @OutputTimeUnit(TimeUnit.MICROSECONDS) 26 | class OrderedBenchmark { 27 | @Benchmark 28 | def ordered(state : States.AtomicEventState): Unit = { 29 | state.event.ordered 30 | } 31 | } 32 | 33 | object States { 34 | @State(Scope.Benchmark) 35 | class AtomicEventState { 36 | var event: Event = _ 37 | 38 | @Setup(Level.Trial) 39 | def init(): Unit = { 40 | val uuid = UUID.randomUUID() 41 | val timestamp = Instant.now() 42 | val vCollector = "2.0.0" 43 | val vTracker = "scala_0.7.0" 44 | event = Event.minimal(uuid, timestamp, vCollector, vTracker) 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /benchmark/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/benchmark/ToTsvBenchmark.scala: -------------------------------------------------------------------------------- 1 | package com.snowplowanalytics.snowplow.analytics.scalasdk.benchmark 2 | 3 | import org.openjdk.jmh.annotations._ 4 | 5 | import java.util.concurrent.TimeUnit 6 | import java.util.UUID 7 | import java.time.Instant 8 | 9 | import com.snowplowanalytics.snowplow.analytics.scalasdk.Event 10 | 11 | @State(Scope.Thread) 12 | @BenchmarkMode(Array(Mode.AverageTime, Mode.Throughput)) 13 | @OutputTimeUnit(TimeUnit.MICROSECONDS) 14 | class ToTsvBenchmark { 15 | @Benchmark 16 | def toTsv(state : ToTsvBenchmark.AtomicEventState): Unit = { 17 | state.event.toTsv 18 | } 19 | } 20 | 21 | object ToTsvBenchmark { 22 | @State(Scope.Benchmark) 23 | class AtomicEventState { 24 | var event: Event = _ 25 | 26 | @Setup(Level.Trial) 27 | def init(): Unit = { 28 | val uuid = UUID.randomUUID() 29 | val timestamp = Instant.now() 30 | val vCollector = "2.0.0" 31 | val vTracker = "scala_0.7.0" 32 | event = Event.minimal(uuid, timestamp, vCollector, vTracker) 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /build.sbt: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | 14 | lazy val root = project 15 | .in(file(".")) 16 | .settings( 17 | Seq[Setting[_]]( 18 | name := "snowplow-scala-analytics-sdk", 19 | organization := "com.snowplowanalytics", 20 | description := "Scala analytics SDK for Snowplow", 21 | scalaVersion := "2.13.10", 22 | crossScalaVersions := Seq("2.12.17", "2.13.10", "3.2.1") 23 | ) 24 | ) 25 | .enablePlugins(SiteScaladocPlugin) 26 | .enablePlugins(PreprocessPlugin) 27 | .settings(BuildSettings.dynVerSettings) 28 | .settings(BuildSettings.buildSettings) 29 | .settings(BuildSettings.publishSettings) 30 | .settings(BuildSettings.mimaSettings) 31 | .settings(BuildSettings.scoverageSettings) 32 | .settings(BuildSettings.sbtSiteSettings) 33 | .settings(BuildSettings.formattingSettings) 34 | .settings( 35 | Seq( 36 | shellPrompt := { _ => name.value + " > " } 37 | ) 38 | ) 39 | .settings( 40 | libraryDependencies ++= Seq( 41 | // Scala 42 | Dependencies.igluCore, 43 | Dependencies.cats, 44 | Dependencies.circeParser, 45 | Dependencies.circeGeneric, 46 | // Scala (test only) 47 | Dependencies.specs2, 48 | Dependencies.specs2Scalacheck, 49 | Dependencies.scalacheck, 50 | Dependencies.circeLiteral 51 | ) 52 | ) 53 | 54 | lazy val benchmark = project 55 | .in(file("benchmark")) 56 | .dependsOn(root % "test->test") 57 | .enablePlugins(JmhPlugin) 58 | -------------------------------------------------------------------------------- /project/BuildSettings.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | 14 | // SBT 15 | import sbt._ 16 | import Keys._ 17 | 18 | 19 | // Mima plugin 20 | import com.typesafe.tools.mima.plugin.MimaKeys._ 21 | import com.typesafe.tools.mima.core.{ProblemFilters, DirectMissingMethodProblem} 22 | 23 | // Scoverage plugin 24 | import scoverage.ScoverageKeys._ 25 | 26 | 27 | import sbtdynver.DynVerPlugin.autoImport._ 28 | 29 | import com.typesafe.sbt.site.SitePlugin.autoImport._ 30 | import com.typesafe.sbt.site.SiteScaladocPlugin.autoImport._ 31 | import com.typesafe.sbt.site.preprocess.PreprocessPlugin.autoImport._ 32 | 33 | import org.scalafmt.sbt.ScalafmtPlugin.autoImport._ 34 | 35 | object BuildSettings { 36 | 37 | // Basic settings for our app 38 | lazy val buildSettings = Seq( 39 | scalacOptions ++= Seq( 40 | "-deprecation", 41 | "-encoding", "UTF-8", 42 | "-feature", 43 | "-unchecked" 44 | ), 45 | scalacOptions ++= { 46 | if (scalaVersion.value.startsWith("3")) { 47 | Seq("-Xmax-inlines", "150") 48 | } else { 49 | Seq( 50 | "-Ywarn-dead-code", 51 | "-Ywarn-numeric-widen", 52 | "-Ywarn-value-discard" 53 | ) 54 | } 55 | } 56 | ) 57 | 58 | lazy val dynVerSettings = Seq( 59 | ThisBuild / dynverVTagPrefix := false, // Otherwise git tags required to have v-prefix 60 | ThisBuild / dynverSeparator := "-" // to be compatible with docker 61 | ) 62 | 63 | lazy val publishSettings = Seq[Setting[_]]( 64 | publishArtifact := true, 65 | Test / publishArtifact := false, 66 | licenses += ("Apache-2.0", url("http://www.apache.org/licenses/LICENSE-2.0.html")), 67 | pomIncludeRepository := { _ => false }, 68 | homepage := Some(url("http://snowplowanalytics.com")), 69 | developers := List( 70 | Developer( 71 | "Snowplow Analytics Ltd", 72 | "Snowplow Analytics Ltd", 73 | "support@snowplowanalytics.com", 74 | url("https://snowplowanalytics.com") 75 | ) 76 | ) 77 | ) 78 | 79 | // If new version introduces breaking changes, clear out the lists of previous version. 80 | // Otherwise, add previous version to set without removing other versions. 81 | val mimaPreviousVersionsScala2 = Set("3.0.1") 82 | val mimaPreviousVersionsScala3 = Set() 83 | lazy val mimaSettings = Seq( 84 | mimaPreviousArtifacts := { 85 | val versionsForBuild = 86 | CrossVersion.partialVersion(scalaVersion.value) match { 87 | case Some((3, _)) => 88 | mimaPreviousVersionsScala3 89 | case _ => 90 | mimaPreviousVersionsScala2 91 | } 92 | 93 | versionsForBuild.map { organization.value %% name.value % _ } 94 | }, 95 | ThisBuild / mimaFailOnNoPrevious := false, 96 | mimaBinaryIssueFilters ++= Seq( 97 | // DeriveParser should not have been public in previous versions 98 | ProblemFilters.exclude[DirectMissingMethodProblem]("com.snowplowanalytics.snowplow.analytics.scalasdk.decode.Parser#DeriveParser.get") 99 | ), 100 | Test / test := (Test / test).dependsOn(mimaReportBinaryIssues).value 101 | ) 102 | 103 | val scoverageSettings = Seq( 104 | coverageMinimumStmtTotal := 50, 105 | // Excluded because of shapeless, which would generate 1000x500KB statements driving coverage OOM 106 | coverageExcludedFiles := """.*\/Event.*;""", 107 | coverageFailOnMinimum := true, 108 | coverageHighlighting := false 109 | ) 110 | 111 | lazy val sbtSiteSettings = Seq( 112 | SiteScaladoc / siteSubdirName := s"${version.value}", 113 | Preprocess / preprocessVars := Map("VERSION" -> version.value) 114 | ) 115 | 116 | lazy val formattingSettings = Seq( 117 | scalafmtConfig := file(".scalafmt.conf"), 118 | scalafmtOnCompile := true 119 | ) 120 | 121 | } 122 | -------------------------------------------------------------------------------- /project/Dependencies.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | import sbt._ 14 | 15 | object Dependencies { 16 | 17 | object V { 18 | val igluCore = "1.1.3" 19 | val cats = "2.9.0" 20 | val circe = "0.14.3" 21 | // Scala (test only) 22 | val specs2 = "4.19.0" 23 | val scalaCheck = "1.17.0" 24 | } 25 | 26 | val igluCore = "com.snowplowanalytics" %% "iglu-core-circe" % V.igluCore 27 | val cats = "org.typelevel" %% "cats-core" % V.cats 28 | val circeParser = "io.circe" %% "circe-parser" % V.circe 29 | val circeGeneric = "io.circe" %% "circe-generic" % V.circe 30 | // Scala (test only) 31 | val specs2 = "org.specs2" %% "specs2-core" % V.specs2 % Test 32 | val specs2Scalacheck = "org.specs2" %% "specs2-scalacheck" % V.specs2 % Test 33 | val scalacheck = "org.scalacheck" %% "scalacheck" % V.scalaCheck % Test 34 | val circeLiteral = "io.circe" %% "circe-literal" % V.circe % Test 35 | } 36 | -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.8.2 2 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "1.1.1") 2 | addSbtPlugin("org.scoverage" % "sbt-scoverage" % "2.0.7") 3 | addSbtPlugin("com.typesafe.sbt" % "sbt-site" % "1.4.0") 4 | addSbtPlugin("com.typesafe.sbt" % "sbt-git" % "1.0.0") 5 | addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.4.0") 6 | addSbtPlugin("com.geirsson" % "sbt-ci-release" % "1.5.7") 7 | addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.6") 8 | addSbtPlugin("com.dwijnand" % "sbt-dynver" % "4.1.1") 9 | addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.10.0-RC1") 10 | 11 | libraryDependencySchemes += "org.scala-lang.modules" %% "scala-xml" % VersionScheme.Always 12 | -------------------------------------------------------------------------------- /project/travis-deploy-key.enc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowplow/snowplow-scala-analytics-sdk/2ba728c6d1dfaca2242baabf515d73717adf5e99/project/travis-deploy-key.enc -------------------------------------------------------------------------------- /src/main/scala-2/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/Parser.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.snowplow.analytics.scalasdk.decode 14 | 15 | import cats.implicits._ 16 | import shapeless._ 17 | import shapeless.ops.record._ 18 | import shapeless.ops.hlist._ 19 | import cats.data.{NonEmptyList, Validated} 20 | import java.nio.ByteBuffer 21 | import scala.collection.mutable.ListBuffer 22 | import com.snowplowanalytics.snowplow.analytics.scalasdk.ParsingError.{FieldNumberMismatch, NotTSV, RowDecodingError} 23 | 24 | private[scalasdk] trait Parser[A] extends TSVParser[A] { 25 | 26 | /** Heterogeneous TSV values */ 27 | type HTSV <: HList 28 | 29 | def expectedNumFields: Int 30 | 31 | /** Evidence allowing to transform TSV line into `HList` */ 32 | protected def decoder: RowDecoder[HTSV] 33 | 34 | /** Evidence that `A` is isomorphic to `HTSV` */ 35 | protected def generic: Generic.Aux[A, HTSV] 36 | 37 | def parse(row: String): DecodeResult[A] = { 38 | val values = row.split("\t", -1) 39 | if (values.length == 1) 40 | Validated.Invalid(NotTSV) 41 | else if (values.length != expectedNumFields) 42 | Validated.Invalid(FieldNumberMismatch(values.length)) 43 | else { 44 | val decoded = decoder(values.toList).leftMap(e => RowDecodingError(e)) 45 | decoded.map(decodedValue => generic.from(decodedValue)) 46 | } 47 | } 48 | 49 | def parseBytes(row: ByteBuffer): DecodeResult[A] = { 50 | val values = Parser.splitBuffer(row) 51 | if (values.length == 1) 52 | Validated.Invalid(NotTSV) 53 | else if (values.length != expectedNumFields) 54 | Validated.Invalid(FieldNumberMismatch(values.length)) 55 | else { 56 | val decoded = decoder.decodeBytes(values.result()).leftMap(e => RowDecodingError(e)) 57 | decoded.map(decodedValue => generic.from(decodedValue)) 58 | } 59 | } 60 | } 61 | 62 | object Parser { 63 | 64 | private val tab: Byte = '\t'.toByte 65 | 66 | private def splitBuffer(row: ByteBuffer): ListBuffer[ByteBuffer] = { 67 | var current = row.duplicate 68 | val builder = ListBuffer(current) 69 | (row.position() until row.limit()).foreach { i => 70 | if (row.get(i) === tab) { 71 | current.limit(i) 72 | current = row.duplicate.position(i + 1) 73 | builder += current 74 | } 75 | } 76 | builder 77 | } 78 | 79 | private[scalasdk] sealed trait DeriveParser[A] { 80 | 81 | def knownKeys[R <: HList, K <: HList, L <: HList]( 82 | implicit lgen: LabelledGeneric.Aux[A, R], 83 | keys: Keys.Aux[R, K], 84 | gen: Generic.Aux[A, L], 85 | toTraversableAux: ToTraversable.Aux[K, List, Symbol] 86 | ): List[String] = 87 | keys().toList.map(_.name) 88 | 89 | /** 90 | * Get instance of parser after all evidences are given 91 | * @tparam R full class representation with field names and types 92 | * @tparam K evidence of field names 93 | * @tparam L evidence of field types 94 | */ 95 | def get[R <: HList, K <: HList, L <: HList]( 96 | maxLengths: Map[String, Int] 97 | )( 98 | implicit lgen: LabelledGeneric.Aux[A, R], 99 | keys: Keys.Aux[R, K], 100 | gen: Generic.Aux[A, L], 101 | toTraversableAux: ToTraversable.Aux[K, List, Symbol], 102 | deriveRowDecoder: RowDecoder.DeriveRowDecoder[L] 103 | ): TSVParser[A] = 104 | new Parser[A] { 105 | type HTSV = L 106 | val keyList = keys().toList 107 | val expectedNumFields: Int = keyList.length 108 | val decoder: RowDecoder[L] = deriveRowDecoder.get(keyList, maxLengths) 109 | val generic: Generic.Aux[A, L] = gen 110 | } 111 | } 112 | 113 | /** Derive a TSV parser for `A` */ 114 | private[scalasdk] def deriveFor[A]: DeriveParser[A] = 115 | new DeriveParser[A] {} 116 | } 117 | -------------------------------------------------------------------------------- /src/main/scala-2/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/RowDecoderCompanion.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.snowplow.analytics.scalasdk.decode 14 | 15 | import shapeless._ 16 | import cats.syntax.validated._ 17 | import cats.syntax.either._ 18 | import cats.syntax.apply._ 19 | import java.nio.ByteBuffer 20 | import com.snowplowanalytics.snowplow.analytics.scalasdk.ParsingError.RowDecodingErrorInfo.UnhandledRowDecodingError 21 | 22 | private[scalasdk] trait RowDecoderCompanion { 23 | import HList.ListCompat._ 24 | 25 | sealed trait DeriveRowDecoder[L] { 26 | def get(knownKeys: List[Key], maxLengths: Map[String, Int]): RowDecoder[L] 27 | } 28 | 29 | object DeriveRowDecoder { 30 | def apply[L](implicit fromRow: DeriveRowDecoder[L]): DeriveRowDecoder[L] = fromRow 31 | } 32 | 33 | /** Parse TSV row into HList */ 34 | private def parse[H: ValueDecoder, T <: HList]( 35 | key: Key, 36 | tailDecoder: RowDecoder[T], 37 | maxLength: Option[Int], 38 | row: List[String] 39 | ): RowDecodeResult[H :: T] = 40 | row match { 41 | case h :: t => 42 | val hv: RowDecodeResult[H] = ValueDecoder[H].parse(key, h, maxLength).toValidatedNel 43 | val tv: RowDecodeResult[T] = tailDecoder.apply(t) 44 | (hv, tv).mapN(_ :: _) 45 | case Nil => UnhandledRowDecodingError("Not enough values, format is invalid").invalidNel 46 | } 47 | 48 | /** Parse TSV row into HList */ 49 | private def parseBytes[H: ValueDecoder, T <: HList]( 50 | key: Key, 51 | tailDecoder: RowDecoder[T], 52 | maxLength: Option[Int], 53 | row: List[ByteBuffer] 54 | ): RowDecodeResult[H :: T] = 55 | row match { 56 | case h :: t => 57 | val hv: RowDecodeResult[H] = ValueDecoder[H].parseBytes(key, h, maxLength).toValidatedNel 58 | val tv: RowDecodeResult[T] = tailDecoder.decodeBytes(t) 59 | (hv, tv).mapN(_ :: _) 60 | case Nil => UnhandledRowDecodingError("Not enough values, format is invalid").invalidNel 61 | } 62 | 63 | implicit def hnilFromRow: DeriveRowDecoder[HNil] = 64 | new DeriveRowDecoder[HNil] { 65 | def get(knownKeys: List[Key], maxLengths: Map[String, Int]): RowDecoder[HNil] = 66 | new RowDecoder[HNil] { 67 | def apply(row: List[String]): RowDecodeResult[HNil] = 68 | row match { 69 | case Nil => 70 | HNil.validNel 71 | case _ => 72 | UnhandledRowDecodingError("Not enough values, format is invalid").invalidNel 73 | } 74 | 75 | def decodeBytes(row: List[ByteBuffer]): RowDecodeResult[HNil] = 76 | row match { 77 | case Nil => 78 | HNil.validNel 79 | case _ => 80 | UnhandledRowDecodingError("Not enough values, format is invalid").invalidNel 81 | } 82 | } 83 | } 84 | 85 | implicit def hconsFromRow[H: ValueDecoder, T <: HList: DeriveRowDecoder]: DeriveRowDecoder[H :: T] = 86 | new DeriveRowDecoder[H :: T] { 87 | def get(knownKeys: List[Key], maxLengths: Map[String, Int]): RowDecoder[H :: T] = 88 | knownKeys match { 89 | case key :: tailKeys => 90 | val tailDecoder = DeriveRowDecoder.apply[T].get(tailKeys, maxLengths) 91 | val maxLength = maxLengths.get(key.name) 92 | new RowDecoder[H :: T] { 93 | def apply(row: List[String]): RowDecodeResult[H :: T] = parse(key, tailDecoder, maxLength, row) 94 | def decodeBytes(row: List[ByteBuffer]): RowDecodeResult[H :: T] = parseBytes(key, tailDecoder, maxLength, row) 95 | } 96 | case Nil => 97 | // Shapeless type checking makes this impossible 98 | throw new IllegalStateException 99 | } 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /src/main/scala-3/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/Parser.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.snowplow.analytics.scalasdk.decode 14 | 15 | import cats.implicits._ 16 | import cats.data.{NonEmptyList, Validated} 17 | import java.nio.ByteBuffer 18 | import scala.collection.mutable.ListBuffer 19 | import com.snowplowanalytics.snowplow.analytics.scalasdk.ParsingError.{FieldNumberMismatch, NotTSV, RowDecodingError} 20 | import scala.deriving._ 21 | import scala.compiletime._ 22 | 23 | private[scalasdk] trait Parser[A] extends TSVParser[A] { 24 | 25 | /** List of field names defined on `A` */ 26 | def expectedNumFields: Int 27 | 28 | protected def decoder: RowDecoder[A] 29 | 30 | def parse(row: String): DecodeResult[A] = { 31 | val values = row.split("\t", -1) 32 | if (values.length == 1) Validated.Invalid(NotTSV) 33 | else if (values.length != expectedNumFields) Validated.Invalid(FieldNumberMismatch(values.length)) 34 | else decoder(values.toList).leftMap(e => RowDecodingError(e)) 35 | } 36 | 37 | def parseBytes(row: ByteBuffer): DecodeResult[A] = { 38 | val values = Parser.splitBuffer(row) 39 | if (values.length == 1) Validated.Invalid(NotTSV) 40 | else if (values.length != expectedNumFields) Validated.Invalid(FieldNumberMismatch(values.length)) 41 | else decoder.decodeBytes(values.result()).leftMap(e => RowDecodingError(e)) 42 | } 43 | } 44 | 45 | object Parser { 46 | 47 | private val tab: Byte = '\t'.toByte 48 | 49 | private def splitBuffer(row: ByteBuffer): ListBuffer[ByteBuffer] = { 50 | var current = row.duplicate 51 | val builder = ListBuffer(current) 52 | (row.position() until row.limit()).foreach { i => 53 | if (row.get(i) === tab) { 54 | current.limit(i) 55 | current = row.duplicate.position(i + 1) 56 | builder += current 57 | } 58 | } 59 | builder 60 | } 61 | 62 | private[scalasdk] sealed trait DeriveParser[A] { 63 | inline def knownKeys(implicit mirror: Mirror.ProductOf[A]): List[String] = 64 | constValueTuple[mirror.MirroredElemLabels].toArray.map(_.toString).toList 65 | 66 | inline def get(maxLengths: Map[String, Int])(implicit mirror: Mirror.ProductOf[A]): TSVParser[A] = 67 | new Parser[A] { 68 | val knownKeys: List[Symbol] = constValueTuple[mirror.MirroredElemLabels].toArray.map(s => Symbol(s.toString)).toList 69 | val expectedNumFields = knownKeys.length 70 | val decoder: RowDecoder[A] = RowDecoder.DeriveRowDecoder.of[A].get(knownKeys, maxLengths) 71 | } 72 | } 73 | 74 | /** Derive a TSV parser for `A` */ 75 | private[scalasdk] def deriveFor[A]: DeriveParser[A] = 76 | new DeriveParser[A] {} 77 | } 78 | -------------------------------------------------------------------------------- /src/main/scala-3/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/RowDecoderCompanion.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.snowplow.analytics.scalasdk.decode 14 | 15 | import cats.syntax.validated._ 16 | import cats.syntax.either._ 17 | import cats.syntax.apply._ 18 | import com.snowplowanalytics.snowplow.analytics.scalasdk.ParsingError.RowDecodingErrorInfo.UnhandledRowDecodingError 19 | import java.nio.ByteBuffer 20 | import scala.deriving._ 21 | import scala.compiletime._ 22 | 23 | private[scalasdk] trait RowDecoderCompanion { 24 | 25 | sealed trait DeriveRowDecoder[L] { self => 26 | def get(knownKeys: List[Key], maxLengths: Map[String, Int]): RowDecoder[L] 27 | 28 | def map[B](f: L => B): DeriveRowDecoder[B] = 29 | new DeriveRowDecoder[B] { 30 | def get(knownKeys: List[Key], maxLengths: Map[String, Int]): RowDecoder[B] = 31 | self.get(knownKeys, maxLengths).map(f) 32 | } 33 | } 34 | 35 | object DeriveRowDecoder { 36 | inline def of[L](implicit m: Mirror.ProductOf[L]): DeriveRowDecoder[L] = { 37 | val instance = summonInline[DeriveRowDecoder[m.MirroredElemTypes]] 38 | instance.map(tuple => m.fromTuple(tuple)) 39 | } 40 | } 41 | 42 | private def parse[H: ValueDecoder, T <: Tuple]( 43 | key: Key, 44 | tailDecoder: RowDecoder[T], 45 | maxLength: Option[Int], 46 | row: List[String] 47 | ): RowDecodeResult[H *: T] = 48 | row match { 49 | case h :: t => 50 | val hv: RowDecodeResult[H] = ValueDecoder[H].parse(key, h, maxLength).toValidatedNel 51 | val tv: RowDecodeResult[T] = tailDecoder.apply(t) 52 | (hv, tv).mapN(_ *: _) 53 | case Nil => UnhandledRowDecodingError("Not enough values, format is invalid").invalidNel 54 | } 55 | 56 | private def parseBytes[H: ValueDecoder, T <: Tuple]( 57 | key: Key, 58 | tailDecoder: RowDecoder[T], 59 | maxLength: Option[Int], 60 | row: List[ByteBuffer] 61 | ): RowDecodeResult[H *: T] = 62 | row match { 63 | case h :: t => 64 | val hv: RowDecodeResult[H] = ValueDecoder[H].parseBytes(key, h, maxLength).toValidatedNel 65 | val tv: RowDecodeResult[T] = tailDecoder.decodeBytes(t) 66 | (hv, tv).mapN(_ *: _) 67 | case Nil => UnhandledRowDecodingError("Not enough values, format is invalid").invalidNel 68 | } 69 | 70 | implicit def hnilFromRow: DeriveRowDecoder[EmptyTuple] = 71 | new DeriveRowDecoder[EmptyTuple] { 72 | def get(knownKeys: List[Key], maxLengths: Map[String, Int]): RowDecoder[EmptyTuple] = 73 | new RowDecoder[EmptyTuple] { 74 | def apply(row: List[String]): RowDecodeResult[EmptyTuple] = 75 | row match { 76 | case Nil => 77 | EmptyTuple.validNel 78 | case _ => 79 | UnhandledRowDecodingError("Not enough values, format is invalid").invalidNel 80 | } 81 | 82 | def decodeBytes(row: List[ByteBuffer]): RowDecodeResult[EmptyTuple] = 83 | row match { 84 | case Nil => 85 | EmptyTuple.validNel 86 | case _ => 87 | UnhandledRowDecodingError("Not enough values, format is invalid").invalidNel 88 | } 89 | } 90 | } 91 | 92 | implicit def hconsFromRow[H: ValueDecoder, T <: Tuple: DeriveRowDecoder]: DeriveRowDecoder[H *: T] = 93 | new DeriveRowDecoder[H *: T] { 94 | def get(knownKeys: List[Key], maxLengths: Map[String, Int]): RowDecoder[H *: T] = 95 | knownKeys match { 96 | case key :: tailKeys => 97 | val tailDecoder = summon[DeriveRowDecoder[T]].get(tailKeys, maxLengths) 98 | val maxLength = maxLengths.get(key.name) 99 | new RowDecoder[H *: T] { 100 | def apply(row: List[String]): RowDecodeResult[H *: T] = parse(key, tailDecoder, maxLength, row) 101 | def decodeBytes(row: List[ByteBuffer]): RowDecodeResult[H *: T] = parseBytes(key, tailDecoder, maxLength, row) 102 | } 103 | case Nil => 104 | // Shapeless type checking makes this impossible 105 | throw new IllegalStateException 106 | } 107 | } 108 | 109 | } 110 | -------------------------------------------------------------------------------- /src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/Common.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.snowplow.analytics.scalasdk 14 | 15 | import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer} 16 | 17 | object Common { 18 | 19 | val UnstructEventCriterion = 20 | SchemaCriterion("com.snowplowanalytics.snowplow", "unstruct_event", "jsonschema", 1, 0) 21 | 22 | val ContextsCriterion = 23 | SchemaCriterion("com.snowplowanalytics.snowplow", "contexts", "jsonschema", 1, 0) 24 | 25 | val UnstructEventUri = 26 | SchemaKey("com.snowplowanalytics.snowplow", "unstruct_event", "jsonschema", SchemaVer.Full(1, 0, 0)) 27 | 28 | val ContextsUri = 29 | SchemaKey("com.snowplowanalytics.snowplow", "contexts", "jsonschema", SchemaVer.Full(1, 0, 0)) 30 | } 31 | -------------------------------------------------------------------------------- /src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/Data.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.snowplow.analytics.scalasdk 14 | 15 | import com.snowplowanalytics.iglu.core.SchemaKey 16 | 17 | /** 18 | * Common data types for enriched event 19 | */ 20 | object Data { 21 | 22 | /** 23 | * The type (contexts/derived_contexts/unstruct_event) and Iglu URI of a shredded type 24 | */ 25 | case class ShreddedType(shredProperty: ShredProperty, schemaKey: SchemaKey) 26 | 27 | /** 28 | * Known contexts types of enriched event 29 | */ 30 | sealed trait ContextsType { 31 | def field: String 32 | } 33 | 34 | case object DerivedContexts extends ContextsType { 35 | def field = "derived_contexts" 36 | } 37 | 38 | case object CustomContexts extends ContextsType { 39 | def field = "contexts" 40 | } 41 | 42 | /** 43 | * Field types of enriched event that can be shredded (self-describing JSONs) 44 | */ 45 | sealed trait ShredProperty { 46 | 47 | /** 48 | * Canonical field name 49 | */ 50 | def name: String 51 | 52 | /** 53 | * Result output prefix 54 | */ 55 | def prefix: String 56 | } 57 | 58 | case class Contexts(contextType: ContextsType) extends ShredProperty { 59 | def name = contextType.field 60 | def prefix = "contexts_" 61 | } 62 | 63 | case object UnstructEvent extends ShredProperty { 64 | def name = "unstruct_event" 65 | def prefix = name + "_" 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/Event.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2020 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.snowplow.analytics.scalasdk 14 | 15 | // java 16 | import java.time.Instant 17 | import java.util.UUID 18 | import java.time.format.DateTimeFormatter 19 | import java.nio.ByteBuffer 20 | 21 | // circe 22 | import io.circe.{Decoder, Encoder, Json, JsonObject} 23 | import io.circe.Json.JString 24 | import io.circe.generic.semiauto._ 25 | import io.circe.syntax._ 26 | 27 | // iglu 28 | import com.snowplowanalytics.iglu.core.SelfDescribingData 29 | import com.snowplowanalytics.iglu.core.circe.implicits._ 30 | 31 | // This library 32 | import com.snowplowanalytics.snowplow.analytics.scalasdk.decode.{DecodeResult, Key, Parser, TSVParser} 33 | import com.snowplowanalytics.snowplow.analytics.scalasdk.SnowplowEvent.{Contexts, UnstructEvent} 34 | import com.snowplowanalytics.snowplow.analytics.scalasdk.SnowplowEvent._ 35 | import com.snowplowanalytics.snowplow.analytics.scalasdk.validate.FIELD_SIZES 36 | import com.snowplowanalytics.snowplow.analytics.scalasdk.encode.TsvEncoder 37 | 38 | /** 39 | * Case class representing a canonical Snowplow event. 40 | * 41 | * @see https://docs.snowplowanalytics.com/docs/understanding-your-pipeline/canonical-event/ 42 | */ 43 | // format: off 44 | case class Event( 45 | app_id: Option[String], 46 | platform: Option[String], 47 | etl_tstamp: Option[Instant], 48 | collector_tstamp: Instant, 49 | dvce_created_tstamp: Option[Instant], 50 | event: Option[String], 51 | event_id: UUID, 52 | txn_id: Option[Int], 53 | name_tracker: Option[String], 54 | v_tracker: Option[String], 55 | v_collector: String, 56 | v_etl: String, 57 | user_id: Option[String], 58 | user_ipaddress: Option[String], 59 | user_fingerprint: Option[String], 60 | domain_userid: Option[String], 61 | domain_sessionidx: Option[Int], 62 | network_userid: Option[String], 63 | geo_country: Option[String], 64 | geo_region: Option[String], 65 | geo_city: Option[String], 66 | geo_zipcode: Option[String], 67 | geo_latitude: Option[Double], 68 | geo_longitude: Option[Double], 69 | geo_region_name: Option[String], 70 | ip_isp: Option[String], 71 | ip_organization: Option[String], 72 | ip_domain: Option[String], 73 | ip_netspeed: Option[String], 74 | page_url: Option[String], 75 | page_title: Option[String], 76 | page_referrer: Option[String], 77 | page_urlscheme: Option[String], 78 | page_urlhost: Option[String], 79 | page_urlport: Option[Int], 80 | page_urlpath: Option[String], 81 | page_urlquery: Option[String], 82 | page_urlfragment: Option[String], 83 | refr_urlscheme: Option[String], 84 | refr_urlhost: Option[String], 85 | refr_urlport: Option[Int], 86 | refr_urlpath: Option[String], 87 | refr_urlquery: Option[String], 88 | refr_urlfragment: Option[String], 89 | refr_medium: Option[String], 90 | refr_source: Option[String], 91 | refr_term: Option[String], 92 | mkt_medium: Option[String], 93 | mkt_source: Option[String], 94 | mkt_term: Option[String], 95 | mkt_content: Option[String], 96 | mkt_campaign: Option[String], 97 | contexts: Contexts, 98 | se_category: Option[String], 99 | se_action: Option[String], 100 | se_label: Option[String], 101 | se_property: Option[String], 102 | se_value: Option[Double], 103 | unstruct_event: UnstructEvent, 104 | tr_orderid: Option[String], 105 | tr_affiliation: Option[String], 106 | tr_total: Option[Double], 107 | tr_tax: Option[Double], 108 | tr_shipping: Option[Double], 109 | tr_city: Option[String], 110 | tr_state: Option[String], 111 | tr_country: Option[String], 112 | ti_orderid: Option[String], 113 | ti_sku: Option[String], 114 | ti_name: Option[String], 115 | ti_category: Option[String], 116 | ti_price: Option[Double], 117 | ti_quantity: Option[Int], 118 | pp_xoffset_min: Option[Int], 119 | pp_xoffset_max: Option[Int], 120 | pp_yoffset_min: Option[Int], 121 | pp_yoffset_max: Option[Int], 122 | useragent: Option[String], 123 | br_name: Option[String], 124 | br_family: Option[String], 125 | br_version: Option[String], 126 | br_type: Option[String], 127 | br_renderengine: Option[String], 128 | br_lang: Option[String], 129 | br_features_pdf: Option[Boolean], 130 | br_features_flash: Option[Boolean], 131 | br_features_java: Option[Boolean], 132 | br_features_director: Option[Boolean], 133 | br_features_quicktime: Option[Boolean], 134 | br_features_realplayer: Option[Boolean], 135 | br_features_windowsmedia: Option[Boolean], 136 | br_features_gears: Option[Boolean], 137 | br_features_silverlight: Option[Boolean], 138 | br_cookies: Option[Boolean], 139 | br_colordepth: Option[String], 140 | br_viewwidth: Option[Int], 141 | br_viewheight: Option[Int], 142 | os_name: Option[String], 143 | os_family: Option[String], 144 | os_manufacturer: Option[String], 145 | os_timezone: Option[String], 146 | dvce_type: Option[String], 147 | dvce_ismobile: Option[Boolean], 148 | dvce_screenwidth: Option[Int], 149 | dvce_screenheight: Option[Int], 150 | doc_charset: Option[String], 151 | doc_width: Option[Int], 152 | doc_height: Option[Int], 153 | tr_currency: Option[String], 154 | tr_total_base: Option[Double], 155 | tr_tax_base: Option[Double], 156 | tr_shipping_base: Option[Double], 157 | ti_currency: Option[String], 158 | ti_price_base: Option[Double], 159 | base_currency: Option[String], 160 | geo_timezone: Option[String], 161 | mkt_clickid: Option[String], 162 | mkt_network: Option[String], 163 | etl_tags: Option[String], 164 | dvce_sent_tstamp: Option[Instant], 165 | refr_domain_userid: Option[String], 166 | refr_dvce_tstamp: Option[Instant], 167 | derived_contexts: Contexts, 168 | domain_sessionid: Option[String], 169 | derived_tstamp: Option[Instant], 170 | event_vendor: Option[String], 171 | event_name: Option[String], 172 | event_format: Option[String], 173 | event_version: Option[String], 174 | event_fingerprint: Option[String], 175 | true_tstamp: Option[Instant] 176 | ) { 177 | // format: on 178 | 179 | /** 180 | * Extracts metadata from the event containing information about the types and Iglu URIs of its shred properties 181 | */ 182 | def inventory: Set[Data.ShreddedType] = { 183 | val unstructEvent = unstruct_event.data.toSet 184 | .map((ue: SelfDescribingData[Json]) => Data.ShreddedType(Data.UnstructEvent, ue.schema)) 185 | 186 | val derivedContexts = derived_contexts.data.toSet 187 | .map((ctx: SelfDescribingData[Json]) => Data.ShreddedType(Data.Contexts(Data.DerivedContexts), ctx.schema)) 188 | 189 | val customContexts = contexts.data.toSet 190 | .map((ctx: SelfDescribingData[Json]) => Data.ShreddedType(Data.Contexts(Data.CustomContexts), ctx.schema)) 191 | 192 | customContexts ++ derivedContexts ++ unstructEvent 193 | } 194 | 195 | /** 196 | * Returns the event as a map of keys to Circe JSON values, while dropping inventory fields 197 | */ 198 | def atomic: Map[String, Json] = jsonMap - "contexts" - "unstruct_event" - "derived_contexts" 199 | 200 | /** 201 | * Returns the event as a list of key/Circe JSON value pairs. 202 | * Unlike `jsonMap` and `atomic`, these keys use the ordering of the canonical event model 203 | */ 204 | def ordered: List[(String, Option[Json])] = 205 | Event.fieldNames.map(key => (key, jsonMap.get(key))) 206 | 207 | /** 208 | * Returns a compound JSON field containing information about an event's latitude and longitude, 209 | * or None if one of these fields doesn't exist 210 | */ 211 | def geoLocation: Option[(String, Json)] = 212 | for { 213 | lat <- geo_latitude 214 | lon <- geo_longitude 215 | } yield "geo_location" -> s"$lat,$lon".asJson 216 | 217 | /** 218 | * Transforms the event to a validated JSON whose keys are the field names corresponding to the 219 | * EnrichedEvent POJO of the Scala Common Enrich project. If the lossy argument is true, any 220 | * self-describing events in the fields (unstruct_event, contexts and derived_contexts) are returned 221 | * in a "shredded" format (e.g. "unstruct_event_com_acme_1_myField": "value"), otherwise a standard 222 | * self-describing format is used. 223 | * 224 | * @param lossy Whether unstruct_event, contexts and derived_contexts should be flattened 225 | */ 226 | def toJson(lossy: Boolean): Json = 227 | if (lossy) 228 | JsonObject 229 | .fromMap( 230 | atomic ++ contexts.toShreddedJson.toMap ++ derived_contexts.toShreddedJson.toMap ++ unstruct_event.toShreddedJson.toMap ++ geoLocation 231 | ) 232 | .asJson 233 | else 234 | this.asJson 235 | 236 | /** Create the TSV representation of this event. */ 237 | def toTsv: String = TsvEncoder.encode(this) 238 | 239 | /** 240 | * This event as a map of keys to Circe JSON values 241 | */ 242 | private lazy val jsonMap: Map[String, Json] = this.asJsonObject.toMap 243 | } 244 | 245 | object Event { 246 | 247 | @deprecated("Event.unsafe functionality is merged into Event.parse method", "3.1.0") 248 | object unsafe { 249 | implicit def unsafeEventDecoder: Decoder[Event] = Event.eventDecoder 250 | } 251 | 252 | /** 253 | * Automatically derived Circe encoder 254 | */ 255 | implicit val jsonEncoder: Encoder.AsObject[Event] = deriveEncoder[Event] 256 | 257 | implicit def eventDecoder: Decoder[Event] = deriveDecoder[Event] 258 | 259 | /** 260 | * Derive a TSV parser for the Event class 261 | * 262 | * @param truncateAtomicFields A map from field names, e.g. "app_id", to maximum string lengths. 263 | * If supplied, the event fields are truncated to not exceed these maximum values. 264 | * 265 | * @note Enrich already performs atomic field length validation since version 3.0.0. Only supply 266 | * a non-empty map if atomic field lengths are important to you, and either you are parsing 267 | * events generated by an older version of enrich, or if you run enrich with the 268 | * `featureFlags.acceptInvalid` config option on. 269 | */ 270 | def parser(truncateAtomicFields: Map[String, Int] = Map.empty): TSVParser[Event] = 271 | Parser.deriveFor[Event].get(truncateAtomicFields) 272 | 273 | private lazy val stdParser: TSVParser[Event] = parser() 274 | 275 | /** 276 | * Converts a string with an enriched event TSV to an Event instance, 277 | * or a ValidatedNel containing information about errors 278 | * 279 | * @param line Enriched event TSV line 280 | */ 281 | def parse(line: String): DecodeResult[Event] = 282 | stdParser.parse(line) 283 | 284 | def parseBytes(bytes: ByteBuffer): DecodeResult[Event] = 285 | stdParser.parseBytes(bytes) 286 | 287 | private lazy val fieldNames: List[String] = 288 | Parser.deriveFor[Event].knownKeys 289 | 290 | /** 291 | * Creates an event with only required fields. 292 | * All optional fields are set to [[None]]. 293 | */ 294 | def minimal( 295 | id: UUID, 296 | collectorTstamp: Instant, 297 | vCollector: String, 298 | vEtl: String 299 | ): Event = 300 | Event( 301 | None, 302 | None, 303 | None, 304 | collectorTstamp, 305 | None, 306 | None, 307 | id, 308 | None, 309 | None, 310 | None, 311 | vCollector, 312 | vEtl, 313 | None, 314 | None, 315 | None, 316 | None, 317 | None, 318 | None, 319 | None, 320 | None, 321 | None, 322 | None, 323 | None, 324 | None, 325 | None, 326 | None, 327 | None, 328 | None, 329 | None, 330 | None, 331 | None, 332 | None, 333 | None, 334 | None, 335 | None, 336 | None, 337 | None, 338 | None, 339 | None, 340 | None, 341 | None, 342 | None, 343 | None, 344 | None, 345 | None, 346 | None, 347 | None, 348 | None, 349 | None, 350 | None, 351 | None, 352 | None, 353 | Contexts(Nil), 354 | None, 355 | None, 356 | None, 357 | None, 358 | None, 359 | UnstructEvent(None), 360 | None, 361 | None, 362 | None, 363 | None, 364 | None, 365 | None, 366 | None, 367 | None, 368 | None, 369 | None, 370 | None, 371 | None, 372 | None, 373 | None, 374 | None, 375 | None, 376 | None, 377 | None, 378 | None, 379 | None, 380 | None, 381 | None, 382 | None, 383 | None, 384 | None, 385 | None, 386 | None, 387 | None, 388 | None, 389 | None, 390 | None, 391 | None, 392 | None, 393 | None, 394 | None, 395 | None, 396 | None, 397 | None, 398 | None, 399 | None, 400 | None, 401 | None, 402 | None, 403 | None, 404 | None, 405 | None, 406 | None, 407 | None, 408 | None, 409 | None, 410 | None, 411 | None, 412 | None, 413 | None, 414 | None, 415 | None, 416 | None, 417 | None, 418 | None, 419 | None, 420 | None, 421 | None, 422 | None, 423 | Contexts(Nil), 424 | None, 425 | None, 426 | None, 427 | None, 428 | None, 429 | None, 430 | None, 431 | None 432 | ) 433 | } 434 | -------------------------------------------------------------------------------- /src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/ParsingError.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.snowplow.analytics.scalasdk 14 | 15 | import cats.data.NonEmptyList 16 | import cats.syntax.either._ 17 | import io.circe._ 18 | import io.circe.syntax._ 19 | import com.snowplowanalytics.snowplow.analytics.scalasdk.decode.Key 20 | import com.snowplowanalytics.snowplow.analytics.scalasdk.decode.Key._ 21 | 22 | /** 23 | * Represents an error raised when parsing a TSV line. 24 | */ 25 | sealed trait ParsingError extends Product with Serializable 26 | 27 | object ParsingError { 28 | 29 | /** 30 | * Represents an error indicating a non-TSV line. 31 | */ 32 | case object NotTSV extends ParsingError 33 | 34 | /** 35 | * Represents an error indicating the number of actual fields is not equal 36 | * to the number of expected fields. 37 | * @param fieldCount The number of fields in the TSV line. 38 | */ 39 | final case class FieldNumberMismatch(fieldCount: Int) extends ParsingError 40 | 41 | /** 42 | * Represents an error raised when trying to decode the values in a line. 43 | * @param errors A non-empty list of errors encountered when trying to decode the values. 44 | */ 45 | final case class RowDecodingError(errors: NonEmptyList[RowDecodingErrorInfo]) extends ParsingError 46 | 47 | /** 48 | * Contains information about the reasons behind errors raised when trying to decode the values in a line. 49 | */ 50 | sealed trait RowDecodingErrorInfo extends Product with Serializable 51 | 52 | object RowDecodingErrorInfo { 53 | 54 | /** 55 | * Represents cases where tha value in a field is not valid, 56 | * e.g. an invalid timestamp, an invalid UUID, etc. 57 | * @param key The name of the field. 58 | * @param value The value of field. 59 | * @param message The error message. 60 | */ 61 | final case class InvalidValue( 62 | key: Key, 63 | value: String, 64 | message: String 65 | ) extends RowDecodingErrorInfo 66 | 67 | /** 68 | * Represents unhandled errors raised when trying to decode a line. 69 | * For example, while parsing a list of tuples to [[HList]] in 70 | * [[RowDecoder]], type checking should make it impossible to get more or less values 71 | * than expected. 72 | * @param message The error message. 73 | */ 74 | final case class UnhandledRowDecodingError(message: String) extends RowDecodingErrorInfo 75 | 76 | implicit val analyticsSdkRowDecodingErrorInfoCirceEncoder: Encoder[RowDecodingErrorInfo] = 77 | Encoder.instance { 78 | case InvalidValue(key, value, message) => 79 | Json.obj( 80 | "type" := "InvalidValue", 81 | "key" := key.name, 82 | "value" := value, 83 | "message" := message 84 | ) 85 | case UnhandledRowDecodingError(message: String) => 86 | Json.obj( 87 | "type" := "UnhandledRowDecodingError", 88 | "message" := message 89 | ) 90 | } 91 | 92 | implicit val analyticsSdkRowDecodingErrorInfoCirceDecoder: Decoder[RowDecodingErrorInfo] = 93 | Decoder.instance { cursor => 94 | for { 95 | errorType <- cursor.downField("type").as[String] 96 | result <- errorType match { 97 | case "InvalidValue" => 98 | for { 99 | key <- cursor.downField("key").as[Key] 100 | value <- cursor.downField("value").as[String] 101 | message <- cursor.downField("message").as[String] 102 | } yield InvalidValue(key, value, message) 103 | 104 | case "UnhandledRowDecodingError" => 105 | cursor 106 | .downField("message") 107 | .as[String] 108 | .map(UnhandledRowDecodingError(_)) 109 | } 110 | } yield result 111 | } 112 | } 113 | 114 | implicit val analyticsSdkParsingErrorCirceEncoder: Encoder[ParsingError] = 115 | Encoder.instance { 116 | case NotTSV => 117 | Json.obj("type" := "NotTSV") 118 | case FieldNumberMismatch(fieldCount) => 119 | Json.obj( 120 | "type" := "FieldNumberMismatch", 121 | "fieldCount" := fieldCount 122 | ) 123 | case RowDecodingError(errors) => 124 | Json.obj( 125 | "type" := "RowDecodingError", 126 | "errors" := errors.asJson 127 | ) 128 | } 129 | 130 | implicit val analyticsSdkParsingErrorCirceDecoder: Decoder[ParsingError] = 131 | Decoder.instance { cursor => 132 | for { 133 | error <- cursor.downField("type").as[String] 134 | result <- error match { 135 | case "NotTSV" => 136 | NotTSV.asRight 137 | case "FieldNumberMismatch" => 138 | cursor 139 | .downField("fieldCount") 140 | .as[Int] 141 | .map(FieldNumberMismatch(_)) 142 | case "RowDecodingError" => 143 | cursor 144 | .downField("errors") 145 | .as[NonEmptyList[RowDecodingErrorInfo]] 146 | .map(RowDecodingError(_)) 147 | case _ => 148 | DecodingFailure(s"Error type $error is not an Analytics SDK Parsing Error.", cursor.history).asLeft 149 | } 150 | } yield result 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/SnowplowEvent.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2020 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.snowplow.analytics.scalasdk 14 | 15 | // circe 16 | import io.circe.syntax._ 17 | import io.circe.{Decoder, DecodingFailure, Encoder, Json, JsonObject} 18 | import io.circe.CursorOp.DownField 19 | 20 | //cats 21 | import cats.implicits._ 22 | 23 | // iglu 24 | import com.snowplowanalytics.iglu.core.circe.CirceIgluCodecs._ 25 | import com.snowplowanalytics.iglu.core.{SchemaKey, SelfDescribingData} 26 | 27 | object SnowplowEvent { 28 | 29 | /** 30 | * A JSON representation of an atomic event's unstruct_event field. 31 | * 32 | * @param data the unstruct event as self-describing JSON, or None if the field is missing 33 | */ 34 | case class UnstructEvent(data: Option[SelfDescribingData[Json]]) extends AnyVal { 35 | def toShreddedJson: Option[(String, Json)] = 36 | data.map { 37 | case SelfDescribingData(s, d) => 38 | (transformSchema(Data.UnstructEvent, s.vendor, s.name, s.version.model), d) 39 | } 40 | } 41 | 42 | implicit final val unstructCirceEncoder: Encoder[UnstructEvent] = 43 | Encoder.instance { unstructEvent => 44 | if (unstructEvent.data.isEmpty) Json.Null 45 | else 46 | JsonObject( 47 | ("schema", Common.UnstructEventUri.toSchemaUri.asJson), 48 | ("data", unstructEvent.data.asJson) 49 | ).asJson 50 | } 51 | 52 | implicit val unstructEventDecoder: Decoder[UnstructEvent] = Decoder.forProduct1("data")(UnstructEvent.apply).recover { 53 | case DecodingFailure(_, DownField("data") :: _) => UnstructEvent(None) 54 | } 55 | 56 | /** 57 | * A JSON representation of an atomic event's contexts or derived_contexts fields. 58 | * 59 | * @param data the context as self-describing JSON, or None if the field is missing 60 | */ 61 | case class Contexts(data: List[SelfDescribingData[Json]]) extends AnyVal { 62 | def toShreddedJson: Map[String, Json] = 63 | data.groupBy(x => (x.schema.vendor, x.schema.name, x.schema.format, x.schema.version.model)).map { 64 | case ((vendor, name, _, model), contextsSdd) => 65 | val transformedName = transformSchema(Data.Contexts(Data.CustomContexts), vendor, name, model) 66 | val transformedData = contextsSdd.map(addSchemaVersionToData).asJson 67 | (transformedName, transformedData) 68 | } 69 | } 70 | 71 | private def addSchemaVersionToData(contextSdd: SelfDescribingData[Json]): Json = 72 | if (contextSdd.data.isObject) { 73 | val version = Json.obj("_schema_version" -> contextSdd.schema.version.asString.asJson) 74 | contextSdd.data.deepMerge(version) 75 | } else contextSdd.data 76 | 77 | implicit final val contextsCirceEncoder: Encoder[Contexts] = 78 | Encoder.instance { contexts => 79 | if (contexts.data.isEmpty) JsonObject.empty.asJson 80 | else 81 | JsonObject( 82 | ("schema", Common.ContextsUri.toSchemaUri.asJson), 83 | ("data", contexts.data.asJson) 84 | ).asJson 85 | } 86 | 87 | implicit val contextsDecoder: Decoder[Contexts] = Decoder.forProduct1("data")(Contexts.apply).recover { 88 | case DecodingFailure(_, DownField("data") :: _) => Contexts(List()) 89 | } 90 | 91 | /** 92 | * @param shredProperty Type of self-describing entity 93 | * @param vendor Iglu schema vendor 94 | * @param name Iglu schema name 95 | * @param model Iglu schema model 96 | * @return the schema, transformed into an Elasticsearch-compatible column name 97 | */ 98 | def transformSchema( 99 | shredProperty: Data.ShredProperty, 100 | vendor: String, 101 | name: String, 102 | model: Int 103 | ): String = { 104 | // Convert dots & dashes in schema vendor to underscore 105 | val snakeCaseVendor = vendor.replaceAll("""[\.\-]""", "_").toLowerCase 106 | 107 | // Convert PascalCase in schema name to snake_case 108 | val snakeCaseName = name.replaceAll("""[\.\-]""", "_").replaceAll("([^A-Z_])([A-Z])", "$1_$2").toLowerCase 109 | 110 | s"${shredProperty.prefix}${snakeCaseVendor}_${snakeCaseName}_$model" 111 | } 112 | 113 | def transformSchema(shredProperty: Data.ShredProperty, schema: SchemaKey): String = 114 | transformSchema(shredProperty, schema.vendor, schema.name, schema.version.model) 115 | } 116 | -------------------------------------------------------------------------------- /src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/RowDecoder.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.snowplow.analytics.scalasdk 14 | package decode 15 | 16 | import java.nio.ByteBuffer 17 | 18 | private[scalasdk] trait RowDecoder[L] extends Serializable { self => 19 | def apply(row: List[String]): RowDecodeResult[L] 20 | def decodeBytes(row: List[ByteBuffer]): RowDecodeResult[L] 21 | def map[B](f: L => B): RowDecoder[B] = 22 | new RowDecoder[B] { 23 | def apply(row: List[String]): RowDecodeResult[B] = self.apply(row).map(f) 24 | def decodeBytes(row: List[ByteBuffer]): RowDecodeResult[B] = self.decodeBytes(row).map(f) 25 | } 26 | } 27 | 28 | object RowDecoder extends RowDecoderCompanion 29 | -------------------------------------------------------------------------------- /src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/TSVParser.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2023 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.snowplow.analytics.scalasdk.decode 14 | 15 | import java.nio.ByteBuffer 16 | 17 | /** Parser for a TSV-encoded string */ 18 | trait TSVParser[A] extends Serializable { 19 | def parseBytes(bytes: ByteBuffer): DecodeResult[A] 20 | def parse(row: String): DecodeResult[A] 21 | } 22 | -------------------------------------------------------------------------------- /src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/ValueDecoder.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.snowplow.analytics.scalasdk 14 | package decode 15 | 16 | // java 17 | import com.snowplowanalytics.snowplow.analytics.scalasdk.validate.FIELD_SIZES 18 | 19 | import java.time.Instant 20 | import java.time.format.DateTimeParseException 21 | import java.util.UUID 22 | import java.nio.ByteBuffer 23 | import java.nio.charset.StandardCharsets 24 | 25 | // cats 26 | import cats.syntax.either._ 27 | import cats.syntax.option._ 28 | import cats.syntax.show._ 29 | 30 | // iglu 31 | import com.snowplowanalytics.iglu.core.SelfDescribingData 32 | import com.snowplowanalytics.iglu.core.circe.implicits._ 33 | 34 | // circe 35 | import io.circe.jawn.JawnParser 36 | import io.circe.{Error, Json, ParsingFailure} 37 | 38 | // This library 39 | import com.snowplowanalytics.snowplow.analytics.scalasdk.Common.{ContextsCriterion, UnstructEventCriterion} 40 | import com.snowplowanalytics.snowplow.analytics.scalasdk.SnowplowEvent.{Contexts, UnstructEvent} 41 | import com.snowplowanalytics.snowplow.analytics.scalasdk.ParsingError.RowDecodingErrorInfo 42 | import com.snowplowanalytics.snowplow.analytics.scalasdk.ParsingError.RowDecodingErrorInfo._ 43 | 44 | private[decode] trait ValueDecoder[A] { 45 | def parse( 46 | key: Key, 47 | value: String, 48 | maxLength: Option[Int] 49 | ): DecodedValue[A] 50 | 51 | def parseBytes( 52 | key: Key, 53 | value: ByteBuffer, 54 | maxLength: Option[Int] 55 | ): DecodedValue[A] = 56 | parse(key, StandardCharsets.UTF_8.decode(value).toString, maxLength) 57 | } 58 | 59 | private[decode] object ValueDecoder { 60 | 61 | private val parser: JawnParser = new JawnParser 62 | 63 | def apply[A](implicit readA: ValueDecoder[A]): ValueDecoder[A] = readA 64 | 65 | def fromFunc[A](f: ((Key, String, Option[Int])) => DecodedValue[A]): ValueDecoder[A] = 66 | new ValueDecoder[A] { 67 | def parse( 68 | key: Key, 69 | value: String, 70 | maxLength: Option[Int] 71 | ): DecodedValue[A] = f((key, value, maxLength)) 72 | } 73 | 74 | implicit final val stringColumnDecoder: ValueDecoder[String] = 75 | fromFunc[String] { 76 | case (key, value, Some(maxLength)) if value.length > maxLength => 77 | value.substring(0, maxLength).asRight 78 | case (key, "", _) => 79 | InvalidValue(key, "", s"Field ${key.name} cannot be empty").asLeft 80 | case (_, value, _) => 81 | value.asRight 82 | } 83 | 84 | implicit final val stringOptionColumnDecoder: ValueDecoder[Option[String]] = 85 | fromFunc[Option[String]] { 86 | case (key, value, Some(maxLength)) if value.length > maxLength => 87 | value.substring(0, maxLength).some.asRight 88 | case (_, "", _) => 89 | none[String].asRight 90 | case (_, value, _) => 91 | value.some.asRight 92 | 93 | } 94 | 95 | implicit final val intColumnDecoder: ValueDecoder[Option[Int]] = 96 | fromFunc[Option[Int]] { 97 | case (key, value, _) => 98 | if (value.isEmpty) none[Int].asRight 99 | else 100 | try value.toInt.some.asRight 101 | catch { 102 | case _: NumberFormatException => 103 | InvalidValue(key, value, s"Cannot parse key ${key.name} into integer").asLeft 104 | } 105 | } 106 | 107 | implicit final val uuidColumnDecoder: ValueDecoder[UUID] = 108 | fromFunc[UUID] { 109 | case (key, value, _) => 110 | if (value.isEmpty) 111 | InvalidValue(key, value, s"Field ${key.name} cannot be empty").asLeft 112 | else 113 | try UUID.fromString(value).asRight[RowDecodingErrorInfo] 114 | catch { 115 | case _: IllegalArgumentException => 116 | InvalidValue(key, value, s"Cannot parse key ${key.name} into UUID").asLeft 117 | } 118 | } 119 | 120 | implicit final val boolColumnDecoder: ValueDecoder[Option[Boolean]] = 121 | fromFunc[Option[Boolean]] { 122 | case (key, value, _) => 123 | value match { 124 | case "0" => false.some.asRight 125 | case "1" => true.some.asRight 126 | case "" => none[Boolean].asRight 127 | case _ => InvalidValue(key, value, s"Cannot parse key ${key.name} into boolean").asLeft 128 | } 129 | } 130 | 131 | implicit final val doubleColumnDecoder: ValueDecoder[Option[Double]] = 132 | fromFunc[Option[Double]] { 133 | case (key, value, _) => 134 | if (value.isEmpty) 135 | none[Double].asRight 136 | else 137 | try value.toDouble.some.asRight 138 | catch { 139 | case _: NumberFormatException => 140 | InvalidValue(key, value, s"Cannot parse key ${key.name} into double").asLeft 141 | } 142 | } 143 | 144 | implicit final val instantColumnDecoder: ValueDecoder[Instant] = 145 | fromFunc[Instant] { 146 | case (key, value, _) => 147 | if (value.isEmpty) 148 | InvalidValue(key, value, s"Field ${key.name} cannot be empty").asLeft 149 | else { 150 | val tstamp = reformatTstamp(value) 151 | try Instant.parse(tstamp).asRight 152 | catch { 153 | case _: DateTimeParseException => 154 | InvalidValue(key, value, s"Cannot parse key ${key.name} into datetime").asLeft 155 | } 156 | } 157 | } 158 | 159 | implicit final val instantOptionColumnDecoder: ValueDecoder[Option[Instant]] = 160 | fromFunc[Option[Instant]] { 161 | case (key, value, _) => 162 | if (value.isEmpty) 163 | none[Instant].asRight[RowDecodingErrorInfo] 164 | else { 165 | val tstamp = reformatTstamp(value) 166 | try Instant.parse(tstamp).some.asRight 167 | catch { 168 | case _: DateTimeParseException => 169 | InvalidValue(key, value, s"Cannot parse key ${key.name} into datetime").asLeft 170 | } 171 | } 172 | } 173 | 174 | implicit final val unstructuredJson: ValueDecoder[UnstructEvent] = { 175 | def fromJsonParseResult( 176 | result: Either[ParsingFailure, Json], 177 | key: Key, 178 | originalValue: => String 179 | ): DecodedValue[UnstructEvent] = { 180 | def asLeft(error: Error): RowDecodingErrorInfo = InvalidValue(key, originalValue, error.show) 181 | result 182 | .flatMap(_.as[SelfDescribingData[Json]]) 183 | .leftMap(asLeft) match { 184 | case Right(SelfDescribingData(schema, data)) if UnstructEventCriterion.matches(schema) => 185 | data.as[SelfDescribingData[Json]].leftMap(asLeft).map(_.some).map(UnstructEvent.apply) 186 | case Right(SelfDescribingData(schema, _)) => 187 | InvalidValue(key, originalValue, s"Unknown payload: ${schema.toSchemaUri}").asLeft[UnstructEvent] 188 | case Left(error) => error.asLeft[UnstructEvent] 189 | } 190 | } 191 | new ValueDecoder[UnstructEvent] { 192 | def parse( 193 | key: Key, 194 | value: String, 195 | maxLength: Option[Int] 196 | ): DecodedValue[UnstructEvent] = 197 | if (value.isEmpty) 198 | UnstructEvent(None).asRight[RowDecodingErrorInfo] 199 | else 200 | fromJsonParseResult(parser.parse(value), key, value) 201 | 202 | override def parseBytes( 203 | key: Key, 204 | value: ByteBuffer, 205 | maxLength: Option[Int] 206 | ): DecodedValue[UnstructEvent] = 207 | if (!value.hasRemaining()) 208 | UnstructEvent(None).asRight[RowDecodingErrorInfo] 209 | else 210 | fromJsonParseResult(parser.parseByteBuffer(value), key, StandardCharsets.UTF_8.decode(value).toString) 211 | } 212 | } 213 | 214 | implicit final val contexts: ValueDecoder[Contexts] = { 215 | def fromJsonParseResult( 216 | result: Either[ParsingFailure, Json], 217 | key: Key, 218 | originalValue: => String 219 | ): DecodedValue[Contexts] = { 220 | def asLeft(error: Error): RowDecodingErrorInfo = InvalidValue(key, originalValue, error.show) 221 | result 222 | .flatMap(_.as[SelfDescribingData[Json]]) 223 | .leftMap(asLeft) match { 224 | case Right(SelfDescribingData(schema, data)) if ContextsCriterion.matches(schema) => 225 | data.as[List[SelfDescribingData[Json]]].leftMap(asLeft).map(Contexts.apply) 226 | case Right(SelfDescribingData(schema, _)) => 227 | InvalidValue(key, originalValue, s"Unknown payload: ${schema.toSchemaUri}").asLeft[Contexts] 228 | case Left(error) => error.asLeft[Contexts] 229 | } 230 | } 231 | new ValueDecoder[Contexts] { 232 | def parse( 233 | key: Key, 234 | value: String, 235 | maxLength: Option[Int] 236 | ): DecodedValue[Contexts] = 237 | if (value.isEmpty) 238 | Contexts(List.empty).asRight[RowDecodingErrorInfo] 239 | else 240 | fromJsonParseResult(parser.parse(value), key, value) 241 | 242 | override def parseBytes( 243 | key: Key, 244 | value: ByteBuffer, 245 | maxLength: Option[Int] 246 | ): DecodedValue[Contexts] = 247 | if (!value.hasRemaining()) 248 | Contexts(List.empty).asRight[RowDecodingErrorInfo] 249 | else 250 | fromJsonParseResult(parser.parseByteBuffer(value), key, StandardCharsets.UTF_8.decode(value).toString) 251 | } 252 | } 253 | 254 | /** 255 | * Converts a timestamp to an ISO-8601 format usable by Instant.parse() 256 | * 257 | * @param tstamp Timestamp of the form YYYY-MM-DD hh:mm:ss 258 | * @return ISO-8601 timestamp 259 | */ 260 | private def reformatTstamp(tstamp: String): String = tstamp.replaceAll(" ", "T") + "Z" 261 | } 262 | -------------------------------------------------------------------------------- /src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.snowplow.analytics.scalasdk 14 | 15 | import cats.data.{Validated, ValidatedNel} 16 | import cats.syntax.either._ 17 | import com.snowplowanalytics.snowplow.analytics.scalasdk.ParsingError.RowDecodingErrorInfo 18 | import io.circe.{Decoder, Encoder} 19 | import io.circe.syntax._ 20 | 21 | package object decode { 22 | 23 | /** Expected name of the field */ 24 | type Key = Symbol 25 | 26 | object Key { 27 | implicit val analyticsSdkKeyCirceEncoder: Encoder[Key] = 28 | Encoder.instance(_.toString.stripPrefix("'").asJson) 29 | 30 | implicit val analyticsSdkKeyCirceDecoder: Decoder[Key] = 31 | Decoder.instance(_.as[String].map(Symbol(_))) 32 | } 33 | 34 | /** Result of single-value parsing */ 35 | type DecodedValue[A] = Either[RowDecodingErrorInfo, A] 36 | 37 | /** Result of row decode process */ 38 | type RowDecodeResult[A] = ValidatedNel[RowDecodingErrorInfo, A] 39 | 40 | /** Result of TSV line parsing, which is either an event or parse error */ 41 | type DecodeResult[A] = Validated[ParsingError, A] 42 | } 43 | -------------------------------------------------------------------------------- /src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/encode/TsvEncoder.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020-2020 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.snowplow.analytics.scalasdk.encode 14 | 15 | import java.time.format.DateTimeFormatter 16 | import java.time.Instant 17 | import java.util.UUID 18 | 19 | import io.circe.syntax._ 20 | 21 | import com.snowplowanalytics.snowplow.analytics.scalasdk.SnowplowEvent._ 22 | import com.snowplowanalytics.snowplow.analytics.scalasdk.Event 23 | 24 | object TsvEncoder { 25 | sealed trait FieldEncoder[T] { 26 | def encodeField(t: T): String 27 | } 28 | 29 | implicit object StringEncoder extends FieldEncoder[String] { 30 | def encodeField(str: String) = str 31 | } 32 | 33 | implicit object InstantEncoder extends FieldEncoder[Instant] { 34 | def encodeField(inst: Instant): String = 35 | DateTimeFormatter.ISO_INSTANT 36 | .format(inst) 37 | .replace("T", " ") 38 | .dropRight(1) // remove trailing 'Z' 39 | } 40 | 41 | implicit object UuidEncoder extends FieldEncoder[UUID] { 42 | def encodeField(uuid: UUID): String = uuid.toString 43 | } 44 | 45 | implicit object IntEncoder extends FieldEncoder[Int] { 46 | def encodeField(int: Int): String = int.toString 47 | } 48 | 49 | implicit object DoubleEncoder extends FieldEncoder[Double] { 50 | def encodeField(doub: Double): String = doub.toString 51 | } 52 | 53 | implicit object BooleanEncoder extends FieldEncoder[Boolean] { 54 | def encodeField(bool: Boolean): String = if (bool) "1" else "0" 55 | } 56 | 57 | implicit object ContextsEncoder extends FieldEncoder[Contexts] { 58 | def encodeField(ctxts: Contexts): String = 59 | if (ctxts.data.isEmpty) 60 | "" 61 | else 62 | ctxts.asJson.noSpaces 63 | } 64 | 65 | implicit object UnstructEncoder extends FieldEncoder[UnstructEvent] { 66 | def encodeField(unstruct: UnstructEvent): String = 67 | if (unstruct.data.isDefined) 68 | unstruct.asJson.noSpaces 69 | else 70 | "" 71 | } 72 | 73 | def encode[A](a: A)(implicit ev: FieldEncoder[A]): String = 74 | ev.encodeField(a) 75 | 76 | def encode[A](optA: Option[A])(implicit ev: FieldEncoder[A]): String = 77 | optA.map(a => ev.encodeField(a)).getOrElse("") 78 | 79 | def encode(event: Event): String = 80 | encode(event.app_id) + "\t" + 81 | encode(event.platform) + "\t" + 82 | encode(event.etl_tstamp) + "\t" + 83 | encode(event.collector_tstamp) + "\t" + 84 | encode(event.dvce_created_tstamp) + "\t" + 85 | encode(event.event) + "\t" + 86 | encode(event.event_id) + "\t" + 87 | encode(event.txn_id) + "\t" + 88 | encode(event.name_tracker) + "\t" + 89 | encode(event.v_tracker) + "\t" + 90 | encode(event.v_collector) + "\t" + 91 | encode(event.v_etl) + "\t" + 92 | encode(event.user_id) + "\t" + 93 | encode(event.user_ipaddress) + "\t" + 94 | encode(event.user_fingerprint) + "\t" + 95 | encode(event.domain_userid) + "\t" + 96 | encode(event.domain_sessionidx) + "\t" + 97 | encode(event.network_userid) + "\t" + 98 | encode(event.geo_country) + "\t" + 99 | encode(event.geo_region) + "\t" + 100 | encode(event.geo_city) + "\t" + 101 | encode(event.geo_zipcode) + "\t" + 102 | encode(event.geo_latitude) + "\t" + 103 | encode(event.geo_longitude) + "\t" + 104 | encode(event.geo_region_name) + "\t" + 105 | encode(event.ip_isp) + "\t" + 106 | encode(event.ip_organization) + "\t" + 107 | encode(event.ip_domain) + "\t" + 108 | encode(event.ip_netspeed) + "\t" + 109 | encode(event.page_url) + "\t" + 110 | encode(event.page_title) + "\t" + 111 | encode(event.page_referrer) + "\t" + 112 | encode(event.page_urlscheme) + "\t" + 113 | encode(event.page_urlhost) + "\t" + 114 | encode(event.page_urlport) + "\t" + 115 | encode(event.page_urlpath) + "\t" + 116 | encode(event.page_urlquery) + "\t" + 117 | encode(event.page_urlfragment) + "\t" + 118 | encode(event.refr_urlscheme) + "\t" + 119 | encode(event.refr_urlhost) + "\t" + 120 | encode(event.refr_urlport) + "\t" + 121 | encode(event.refr_urlpath) + "\t" + 122 | encode(event.refr_urlquery) + "\t" + 123 | encode(event.refr_urlfragment) + "\t" + 124 | encode(event.refr_medium) + "\t" + 125 | encode(event.refr_source) + "\t" + 126 | encode(event.refr_term) + "\t" + 127 | encode(event.mkt_medium) + "\t" + 128 | encode(event.mkt_source) + "\t" + 129 | encode(event.mkt_term) + "\t" + 130 | encode(event.mkt_content) + "\t" + 131 | encode(event.mkt_campaign) + "\t" + 132 | encode(event.contexts) + "\t" + 133 | encode(event.se_category) + "\t" + 134 | encode(event.se_action) + "\t" + 135 | encode(event.se_label) + "\t" + 136 | encode(event.se_property) + "\t" + 137 | encode(event.se_value) + "\t" + 138 | encode(event.unstruct_event) + "\t" + 139 | encode(event.tr_orderid) + "\t" + 140 | encode(event.tr_affiliation) + "\t" + 141 | encode(event.tr_total) + "\t" + 142 | encode(event.tr_tax) + "\t" + 143 | encode(event.tr_shipping) + "\t" + 144 | encode(event.tr_city) + "\t" + 145 | encode(event.tr_state) + "\t" + 146 | encode(event.tr_country) + "\t" + 147 | encode(event.ti_orderid) + "\t" + 148 | encode(event.ti_sku) + "\t" + 149 | encode(event.ti_name) + "\t" + 150 | encode(event.ti_category) + "\t" + 151 | encode(event.ti_price) + "\t" + 152 | encode(event.ti_quantity) + "\t" + 153 | encode(event.pp_xoffset_min) + "\t" + 154 | encode(event.pp_xoffset_max) + "\t" + 155 | encode(event.pp_yoffset_min) + "\t" + 156 | encode(event.pp_yoffset_max) + "\t" + 157 | encode(event.useragent) + "\t" + 158 | encode(event.br_name) + "\t" + 159 | encode(event.br_family) + "\t" + 160 | encode(event.br_version) + "\t" + 161 | encode(event.br_type) + "\t" + 162 | encode(event.br_renderengine) + "\t" + 163 | encode(event.br_lang) + "\t" + 164 | encode(event.br_features_pdf) + "\t" + 165 | encode(event.br_features_flash) + "\t" + 166 | encode(event.br_features_java) + "\t" + 167 | encode(event.br_features_director) + "\t" + 168 | encode(event.br_features_quicktime) + "\t" + 169 | encode(event.br_features_realplayer) + "\t" + 170 | encode(event.br_features_windowsmedia) + "\t" + 171 | encode(event.br_features_gears) + "\t" + 172 | encode(event.br_features_silverlight) + "\t" + 173 | encode(event.br_cookies) + "\t" + 174 | encode(event.br_colordepth) + "\t" + 175 | encode(event.br_viewwidth) + "\t" + 176 | encode(event.br_viewheight) + "\t" + 177 | encode(event.os_name) + "\t" + 178 | encode(event.os_family) + "\t" + 179 | encode(event.os_manufacturer) + "\t" + 180 | encode(event.os_timezone) + "\t" + 181 | encode(event.dvce_type) + "\t" + 182 | encode(event.dvce_ismobile) + "\t" + 183 | encode(event.dvce_screenwidth) + "\t" + 184 | encode(event.dvce_screenheight) + "\t" + 185 | encode(event.doc_charset) + "\t" + 186 | encode(event.doc_width) + "\t" + 187 | encode(event.doc_height) + "\t" + 188 | encode(event.tr_currency) + "\t" + 189 | encode(event.tr_total_base) + "\t" + 190 | encode(event.tr_tax_base) + "\t" + 191 | encode(event.tr_shipping_base) + "\t" + 192 | encode(event.ti_currency) + "\t" + 193 | encode(event.ti_price_base) + "\t" + 194 | encode(event.base_currency) + "\t" + 195 | encode(event.geo_timezone) + "\t" + 196 | encode(event.mkt_clickid) + "\t" + 197 | encode(event.mkt_network) + "\t" + 198 | encode(event.etl_tags) + "\t" + 199 | encode(event.dvce_sent_tstamp) + "\t" + 200 | encode(event.refr_domain_userid) + "\t" + 201 | encode(event.refr_dvce_tstamp) + "\t" + 202 | encode(event.derived_contexts) + "\t" + 203 | encode(event.domain_sessionid) + "\t" + 204 | encode(event.derived_tstamp) + "\t" + 205 | encode(event.event_vendor) + "\t" + 206 | encode(event.event_name) + "\t" + 207 | encode(event.event_format) + "\t" + 208 | encode(event.event_version) + "\t" + 209 | encode(event.event_fingerprint) + "\t" + 210 | encode(event.true_tstamp) 211 | } 212 | -------------------------------------------------------------------------------- /src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/validate/package.scala: -------------------------------------------------------------------------------- 1 | package com.snowplowanalytics.snowplow.analytics.scalasdk 2 | 3 | package object validate { 4 | 5 | val FIELD_SIZES: Map[String, Int] = Map( 6 | "app_id" -> 255, 7 | "platform" -> 255, 8 | "event" -> 128, 9 | "event_id" -> 36, 10 | "name_tracker" -> 128, 11 | "v_tracker" -> 100, 12 | "v_collector" -> 100, 13 | "v_etl" -> 100, 14 | "user_id" -> 255, 15 | "user_ipaddress" -> 128, 16 | "user_fingerprint" -> 128, 17 | "domain_userid" -> 128, 18 | "network_userid" -> 128, 19 | "geo_country" -> 2, 20 | "geo_region" -> 3, 21 | "geo_city" -> 75, 22 | "geo_zipcode" -> 15, 23 | "geo_region_name" -> 100, 24 | "ip_isp" -> 100, 25 | "ip_organization" -> 128, 26 | "ip_domain" -> 128, 27 | "ip_netspeed" -> 100, 28 | "page_url" -> 4096, 29 | "page_title" -> 2000, 30 | "page_referrer" -> 4096, 31 | "page_urlscheme" -> 16, 32 | "page_urlhost" -> 255, 33 | "page_urlpath" -> 3000, 34 | "page_urlquery" -> 6000, 35 | "page_urlfragment" -> 3000, 36 | "refr_urlscheme" -> 16, 37 | "refr_urlhost" -> 255, 38 | "refr_urlpath" -> 6000, 39 | "refr_urlquery" -> 6000, 40 | "refr_urlfragment" -> 3000, 41 | "refr_medium" -> 25, 42 | "refr_source" -> 50, 43 | "refr_term" -> 255, 44 | "mkt_medium" -> 255, 45 | "mkt_source" -> 255, 46 | "mkt_term" -> 255, 47 | "mkt_content" -> 500, 48 | "mkt_campaign" -> 255, 49 | "se_category" -> 1000, 50 | "se_action" -> 1000, 51 | "se_label" -> 4096, 52 | "se_property" -> 1000, 53 | "tr_orderid" -> 255, 54 | "tr_affiliation" -> 255, 55 | "tr_city" -> 255, 56 | "tr_state" -> 255, 57 | "tr_country" -> 255, 58 | "ti_orderid" -> 255, 59 | "ti_sku" -> 255, 60 | "ti_name" -> 255, 61 | "ti_category" -> 255, 62 | "useragent" -> 1000, 63 | "br_name" -> 50, 64 | "br_family" -> 50, 65 | "br_version" -> 50, 66 | "br_type" -> 50, 67 | "br_renderengine" -> 50, 68 | "br_lang" -> 255, 69 | "br_colordepth" -> 12, 70 | "os_name" -> 50, 71 | "os_family" -> 50, 72 | "os_manufacturer" -> 50, 73 | "os_timezone" -> 255, 74 | "dvce_type" -> 50, 75 | "doc_charset" -> 128, 76 | "tr_currency" -> 3, 77 | "ti_currency" -> 3, 78 | "base_currency" -> 3, 79 | "geo_timezone" -> 64, 80 | "mkt_clickid" -> 128, 81 | "mkt_network" -> 64, 82 | "etl_tags" -> 500, 83 | "refr_domain_userid" -> 128, 84 | "domain_sessionid" -> 128, 85 | "event_vendor" -> 1000, 86 | "event_name" -> 1000, 87 | "event_format" -> 128, 88 | "event_version" -> 128, 89 | "event_fingerprint" -> 128 90 | ) 91 | 92 | private def validateStr( 93 | k: String, 94 | value: String 95 | ): List[String] = 96 | if (value.length > FIELD_SIZES.getOrElse(k, Int.MaxValue)) 97 | List(s"Field $k longer than maximum allowed size ${FIELD_SIZES.getOrElse(k, Int.MaxValue)}") 98 | else 99 | List.empty[String] 100 | 101 | private def validateStr( 102 | k: String, 103 | v: Option[String] 104 | ): List[String] = 105 | v match { 106 | case Some(value) => validateStr(k, value) 107 | case None => List.empty[String] 108 | } 109 | 110 | def validator(e: Event): List[String] = 111 | validateStr("app_id", e.app_id) ++ 112 | validateStr("platform", e.platform) ++ 113 | validateStr("event", e.event) ++ 114 | validateStr("name_tracker", e.name_tracker) ++ 115 | validateStr("v_tracker", e.v_tracker) ++ 116 | validateStr("v_collector", e.v_collector) ++ 117 | validateStr("v_etl", e.v_etl) ++ 118 | validateStr("user_id", e.user_id) ++ 119 | validateStr("user_ipaddress", e.user_ipaddress) ++ 120 | validateStr("user_fingerprint", e.user_fingerprint) ++ 121 | validateStr("domain_userid", e.domain_userid) ++ 122 | validateStr("network_userid", e.network_userid) ++ 123 | validateStr("geo_country", e.geo_country) ++ 124 | validateStr("geo_region", e.geo_region) ++ 125 | validateStr("geo_city", e.geo_city) ++ 126 | validateStr("geo_zipcode", e.geo_zipcode) ++ 127 | validateStr("geo_region_name", e.geo_region_name) ++ 128 | validateStr("ip_isp", e.ip_isp) ++ 129 | validateStr("ip_organization", e.ip_organization) ++ 130 | validateStr("ip_domain", e.ip_domain) ++ 131 | validateStr("ip_netspeed", e.ip_netspeed) ++ 132 | validateStr("page_url", e.page_url) ++ 133 | validateStr("page_title", e.page_title) ++ 134 | validateStr("page_referrer", e.page_referrer) ++ 135 | validateStr("page_urlscheme", e.page_urlscheme) ++ 136 | validateStr("page_urlhost", e.page_urlhost) ++ 137 | validateStr("page_urlpath", e.page_urlpath) ++ 138 | validateStr("page_urlquery", e.page_urlquery) ++ 139 | validateStr("page_urlfragment", e.page_urlfragment) ++ 140 | validateStr("refr_urlscheme", e.refr_urlscheme) ++ 141 | validateStr("refr_urlhost", e.refr_urlhost) ++ 142 | validateStr("refr_urlpath", e.refr_urlpath) ++ 143 | validateStr("refr_urlquery", e.refr_urlquery) ++ 144 | validateStr("refr_urlfragment", e.refr_urlfragment) ++ 145 | validateStr("refr_medium", e.refr_medium) ++ 146 | validateStr("refr_source", e.refr_source) ++ 147 | validateStr("refr_term", e.refr_term) ++ 148 | validateStr("mkt_medium", e.mkt_medium) ++ 149 | validateStr("mkt_source", e.mkt_source) ++ 150 | validateStr("mkt_term", e.mkt_term) ++ 151 | validateStr("mkt_content", e.mkt_content) ++ 152 | validateStr("mkt_campaign", e.mkt_campaign) ++ 153 | validateStr("se_category", e.se_category) ++ 154 | validateStr("se_action", e.se_action) ++ 155 | validateStr("se_label", e.se_label) ++ 156 | validateStr("se_property", e.se_property) ++ 157 | validateStr("tr_orderid", e.tr_orderid) ++ 158 | validateStr("tr_affiliation", e.tr_affiliation) ++ 159 | validateStr("tr_city", e.tr_city) ++ 160 | validateStr("tr_state", e.tr_state) ++ 161 | validateStr("tr_country", e.tr_country) ++ 162 | validateStr("ti_orderid", e.ti_orderid) ++ 163 | validateStr("ti_sku", e.ti_sku) ++ 164 | validateStr("ti_name", e.ti_name) ++ 165 | validateStr("ti_category", e.ti_category) ++ 166 | validateStr("useragent", e.useragent) ++ 167 | validateStr("br_name", e.br_name) ++ 168 | validateStr("br_family", e.br_family) ++ 169 | validateStr("br_version", e.br_version) ++ 170 | validateStr("br_type", e.br_type) ++ 171 | validateStr("br_renderengine", e.br_renderengine) ++ 172 | validateStr("br_lang", e.br_lang) ++ 173 | validateStr("br_colordepth", e.br_colordepth) ++ 174 | validateStr("os_name", e.os_name) ++ 175 | validateStr("os_family", e.os_family) ++ 176 | validateStr("os_manufacturer", e.os_manufacturer) ++ 177 | validateStr("os_timezone", e.os_timezone) ++ 178 | validateStr("dvce_type", e.dvce_type) ++ 179 | validateStr("doc_charset", e.doc_charset) ++ 180 | validateStr("tr_currency", e.tr_currency) ++ 181 | validateStr("ti_currency", e.ti_currency) ++ 182 | validateStr("base_currency", e.base_currency) ++ 183 | validateStr("geo_timezone", e.geo_timezone) ++ 184 | validateStr("mkt_clickid", e.mkt_clickid) ++ 185 | validateStr("mkt_network", e.mkt_network) ++ 186 | validateStr("etl_tags", e.etl_tags) ++ 187 | validateStr("refr_domain_userid", e.refr_domain_userid) ++ 188 | validateStr("domain_sessionid", e.domain_sessionid) ++ 189 | validateStr("event_vendor", e.event_vendor) ++ 190 | validateStr("event_name", e.event_name) ++ 191 | validateStr("event_format", e.event_format) ++ 192 | validateStr("event_version", e.event_version) ++ 193 | validateStr("event_fingerprint", e.event_fingerprint) 194 | 195 | } 196 | -------------------------------------------------------------------------------- /src/site-preprocess/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Project Documentation 6 | 15 | 16 | 17 | Go to the project documentation 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/EventGen.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2020 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.snowplow.analytics.scalasdk 14 | 15 | import com.snowplowanalytics.snowplow.analytics.scalasdk.validate.FIELD_SIZES 16 | import org.scalacheck.{Arbitrary, Gen} 17 | import io.circe._ 18 | import io.circe.syntax._ 19 | import io.circe.{Decoder, Encoder, HCursor, Json} 20 | import io.circe.parser._ 21 | 22 | import java.time.Instant 23 | 24 | object EventGen { 25 | import SnowplowEvent._ 26 | 27 | def strGen(n: Int, gen: Gen[Char]): Gen[String] = 28 | Gen.chooseNum(1, n).flatMap(len => Gen.listOfN(len, gen).map(_.mkString)) 29 | 30 | private val MaxTimestamp = 2871824840360L 31 | 32 | implicit val instantArbitrary: Arbitrary[Instant] = 33 | Arbitrary { 34 | for { 35 | seconds <- Gen.chooseNum(0L, MaxTimestamp) 36 | nanos <- Gen.chooseNum(Instant.MIN.getNano, Instant.MAX.getNano) 37 | } yield Instant.ofEpochMilli(seconds).plusNanos(nanos.toLong) 38 | } 39 | 40 | val instantGen: Gen[Instant] = 41 | Arbitrary.arbitrary[Instant] 42 | 43 | val ipv4Address: Gen[String] = 44 | for { 45 | a <- Gen.chooseNum(0, 255) 46 | b <- Gen.chooseNum(0, 255) 47 | c <- Gen.chooseNum(0, 255) 48 | d <- Gen.chooseNum(0, 255) 49 | } yield s"$a.$b.$c.$d" 50 | 51 | val ipv6Address: Gen[String] = 52 | for { 53 | a <- Arbitrary.arbitrary[Short] 54 | b <- Arbitrary.arbitrary[Short] 55 | c <- Arbitrary.arbitrary[Short] 56 | d <- Arbitrary.arbitrary[Short] 57 | e <- Arbitrary.arbitrary[Short] 58 | f <- Arbitrary.arbitrary[Short] 59 | g <- Arbitrary.arbitrary[Short] 60 | h <- Arbitrary.arbitrary[Short] 61 | } yield f"$a%x:$b%x:$c%x:$d%x:$e%x:$f%x:$g%x:$h%x" 62 | 63 | val ipAddress: Gen[String] = 64 | Gen.oneOf(ipv4Address, ipv6Address) 65 | 66 | val platform: Gen[String] = Gen.oneOf("web", "mob", "app") 67 | 68 | val eventType: Gen[String] = Gen.oneOf("page_view", "page_ping", "transaction", "unstruct") 69 | 70 | val kv: Gen[String] = for { 71 | key <- strGen(15, Gen.alphaNumChar) 72 | value <- strGen(30, Gen.alphaNumChar) 73 | } yield key + "=" + value 74 | val queryString: Gen[String] = Gen.nonEmptyContainerOf[List, String](kv).map(_.mkString("&")) 75 | 76 | val contexts: Contexts = parse(EventSpec.contextsJson) 77 | .flatMap(_.as[Contexts]) 78 | .getOrElse(throw new UnsupportedOperationException("can't decode contexts")) 79 | 80 | val unstruct: UnstructEvent = parse(EventSpec.unstructJson) 81 | .flatMap(_.as[UnstructEvent]) 82 | .getOrElse(throw new UnsupportedOperationException("can't decode unstructured event")) 83 | 84 | val derived_contexts: Contexts = parse(EventSpec.derivedContextsJson) 85 | .flatMap(_.as[Contexts]) 86 | .getOrElse(throw new UnsupportedOperationException("can't decode derived contexts")) 87 | 88 | val event: Gen[Event] = 89 | for { 90 | app_id <- Gen.option(strGen(FIELD_SIZES.getOrElse("app_id", Int.MaxValue), Gen.alphaNumChar)) 91 | platform <- Gen.option(platform) 92 | etl_tstamp <- Gen.option(instantGen) 93 | collector_tstamp <- instantGen 94 | dvce_created_tstamp <- Gen.option(instantGen) 95 | event <- Gen.option(eventType) 96 | event_id <- Gen.uuid 97 | txn_id <- Gen.option(Gen.chooseNum(1, 10000)) 98 | name_tracker <- Gen.option(strGen(FIELD_SIZES.getOrElse("name_tracker", Int.MaxValue), Gen.alphaNumChar)) 99 | v_tracker <- Gen.option(strGen(FIELD_SIZES.getOrElse("v_tracker", Int.MaxValue), Gen.alphaNumChar)) 100 | v_collector <- strGen(FIELD_SIZES.getOrElse("v_collector", Int.MaxValue), Gen.alphaNumChar) 101 | v_etl <- strGen(FIELD_SIZES.getOrElse("v_etl", Int.MaxValue), Gen.alphaNumChar) 102 | user_id <- Gen.option(Gen.uuid).map(_.map(_.toString())) 103 | user_ipaddress <- Gen.option(ipAddress) 104 | user_fingerprint <- Gen.option(strGen(FIELD_SIZES.getOrElse("user_fingerprint", Int.MaxValue), Gen.alphaNumChar)) 105 | domain_userid <- Gen.option(Gen.uuid).map(_.map(_.toString())) 106 | domain_sessionidx <- Gen.option(Gen.chooseNum(1, 10000)) 107 | network_userid <- Gen.option(Gen.uuid).map(_.map(_.toString())) 108 | geo_country <- Gen.option(strGen(FIELD_SIZES.getOrElse("geo_country", Int.MaxValue), Gen.alphaUpperChar)) 109 | geo_region <- Gen.option(strGen(FIELD_SIZES.getOrElse("geo_region", Int.MaxValue), Gen.alphaNumChar)) 110 | geo_city <- Gen.option(strGen(FIELD_SIZES.getOrElse("geo_city", Int.MaxValue), Gen.alphaChar)) 111 | geo_zipcode <- Gen.option(strGen(FIELD_SIZES.getOrElse("geo_zipcode", Int.MaxValue), Gen.alphaNumChar)) 112 | geo_latitude <- Gen.option(Arbitrary.arbitrary[Double]) 113 | geo_longitude <- Gen.option(Arbitrary.arbitrary[Double]) 114 | geo_region_name <- Gen.option(strGen(FIELD_SIZES.getOrElse("geo_region_name", Int.MaxValue), Gen.alphaChar)) 115 | ip_isp <- Gen.option(strGen(FIELD_SIZES.getOrElse("ip_isp", Int.MaxValue), Gen.alphaNumChar)) 116 | ip_organization <- Gen.option(strGen(FIELD_SIZES.getOrElse("ip_organization", Int.MaxValue), Gen.alphaNumChar)) 117 | ip_domain <- Gen.option(strGen(FIELD_SIZES.getOrElse("ip_domain", Int.MaxValue), Gen.alphaNumChar)) 118 | ip_netspeed <- Gen.option(strGen(FIELD_SIZES.getOrElse("ip_netspeed", Int.MaxValue), Gen.alphaNumChar)) 119 | page_url <- Gen.option(strGen(FIELD_SIZES.getOrElse("page_url", Int.MaxValue), Gen.alphaNumChar)) 120 | page_title <- Gen.option(strGen(FIELD_SIZES.getOrElse("page_title", Int.MaxValue), Gen.alphaNumChar)) 121 | page_referrer <- Gen.option(strGen(FIELD_SIZES.getOrElse("page_referrer", Int.MaxValue), Gen.alphaNumChar)) 122 | page_urlscheme <- Gen.option(Gen.oneOf("http", "https")) 123 | page_urlhost <- Gen.option(strGen(FIELD_SIZES.getOrElse("page_urlhost", Int.MaxValue), Gen.alphaNumChar)) 124 | page_urlport <- Gen.option(Gen.chooseNum(1, 65000)) 125 | page_urlpath <- Gen.option(strGen(FIELD_SIZES.getOrElse("page_urlpath", Int.MaxValue), Gen.alphaNumChar)) 126 | page_urlquery <- Gen.option(queryString) 127 | page_urlfragment <- Gen.option(strGen(FIELD_SIZES.getOrElse("page_urlfragment", Int.MaxValue), Gen.alphaNumChar)) 128 | refr_urlscheme <- Gen.option(strGen(FIELD_SIZES.getOrElse("refr_urlscheme", Int.MaxValue), Gen.alphaNumChar)) 129 | refr_urlhost <- Gen.option(strGen(FIELD_SIZES.getOrElse("refr_urlhost", Int.MaxValue), Gen.alphaNumChar)) 130 | refr_urlport <- Gen.option(Gen.chooseNum(1, 65000)) 131 | refr_urlpath <- Gen.option(strGen(FIELD_SIZES.getOrElse("refr_urlpath", Int.MaxValue), Gen.alphaNumChar)) 132 | refr_urlquery <- Gen.option(strGen(FIELD_SIZES.getOrElse("refr_urlquery", Int.MaxValue), Gen.alphaNumChar)) 133 | refr_urlfragment <- Gen.option(strGen(FIELD_SIZES.getOrElse("refr_urlfragment", Int.MaxValue), Gen.alphaNumChar)) 134 | refr_medium <- Gen.option(strGen(FIELD_SIZES.getOrElse("refr_medium", Int.MaxValue), Gen.alphaNumChar)) 135 | refr_source <- Gen.option(strGen(FIELD_SIZES.getOrElse("refr_source", Int.MaxValue), Gen.alphaNumChar)) 136 | refr_term <- Gen.option(strGen(FIELD_SIZES.getOrElse("refr_term", Int.MaxValue), Gen.alphaNumChar)) 137 | mkt_medium <- Gen.option(strGen(FIELD_SIZES.getOrElse("mkt_medium", Int.MaxValue), Gen.alphaNumChar)) 138 | mkt_source <- Gen.option(strGen(FIELD_SIZES.getOrElse("mkt_source", Int.MaxValue), Gen.alphaNumChar)) 139 | mkt_term <- Gen.option(strGen(FIELD_SIZES.getOrElse("mkt_term", Int.MaxValue), Gen.alphaNumChar)) 140 | mkt_content <- Gen.option(strGen(FIELD_SIZES.getOrElse("mkt_content", Int.MaxValue), Gen.alphaNumChar)) 141 | mkt_campaign <- Gen.option(strGen(FIELD_SIZES.getOrElse("mkt_campaign", Int.MaxValue), Gen.alphaNumChar)) 142 | contexts <- Gen.oneOf(contexts, Contexts(Nil)) 143 | se_category <- Gen.option(strGen(FIELD_SIZES.getOrElse("se_category", Int.MaxValue), Gen.alphaNumChar)) 144 | se_action <- Gen.option(strGen(FIELD_SIZES.getOrElse("se_action", Int.MaxValue), Gen.alphaNumChar)) 145 | se_label <- Gen.option(strGen(FIELD_SIZES.getOrElse("se_label", Int.MaxValue), Gen.alphaNumChar)) 146 | se_property <- Gen.option(strGen(FIELD_SIZES.getOrElse("se_property", Int.MaxValue), Gen.alphaNumChar)) 147 | se_value <- Gen.option(Arbitrary.arbitrary[Double]) 148 | unstruct_event = event match { 149 | case Some("unstruct") => unstruct 150 | case _ => UnstructEvent(None) 151 | } 152 | tr_orderid <- Gen.option(Gen.uuid).map(_.map(_.toString())) 153 | tr_affiliation <- Gen.option(strGen(FIELD_SIZES.getOrElse("tr_affiliation", Int.MaxValue), Gen.alphaNumChar)) 154 | tr_total <- Gen.option(Arbitrary.arbitrary[Double]) 155 | tr_tax <- Gen.option(Arbitrary.arbitrary[Double]) 156 | tr_shipping <- Gen.option(Arbitrary.arbitrary[Double]) 157 | tr_city <- Gen.option(strGen(FIELD_SIZES.getOrElse("tr_city", Int.MaxValue), Gen.alphaNumChar)) 158 | tr_state <- Gen.option(strGen(FIELD_SIZES.getOrElse("tr_state", Int.MaxValue), Gen.alphaNumChar)) 159 | tr_country <- Gen.option(strGen(FIELD_SIZES.getOrElse("tr_country", Int.MaxValue), Gen.alphaNumChar)) 160 | ti_orderid <- Gen.option(Gen.uuid).map(_.map(_.toString())) 161 | ti_sku <- Gen.option(strGen(FIELD_SIZES.getOrElse("ti_sku", Int.MaxValue), Gen.alphaNumChar)) 162 | ti_name <- Gen.option(strGen(FIELD_SIZES.getOrElse("ti_name", Int.MaxValue), Gen.alphaNumChar)) 163 | ti_category <- Gen.option(strGen(FIELD_SIZES.getOrElse("ti_category", Int.MaxValue), Gen.alphaNumChar)) 164 | ti_price <- Gen.option(Arbitrary.arbitrary[Double]) 165 | ti_quantity <- Gen.option(Gen.chooseNum(1, 100)) 166 | pp_xoffset_min <- Gen.option(Gen.chooseNum(1, 10000)) 167 | pp_xoffset_max <- Gen.option(Gen.chooseNum(1, 10000)) 168 | pp_yoffset_min <- Gen.option(Gen.chooseNum(1, 10000)) 169 | pp_yoffset_max <- Gen.option(Gen.chooseNum(1, 10000)) 170 | useragent <- Gen.option(strGen(FIELD_SIZES.getOrElse("useragent", Int.MaxValue), Gen.alphaNumChar)) 171 | br_name <- Gen.option(strGen(FIELD_SIZES.getOrElse("br_name", Int.MaxValue), Gen.alphaNumChar)) 172 | br_family <- Gen.option(strGen(FIELD_SIZES.getOrElse("br_family", Int.MaxValue), Gen.alphaNumChar)) 173 | br_version <- Gen.option(strGen(FIELD_SIZES.getOrElse("br_version", Int.MaxValue), Gen.alphaNumChar)) 174 | br_type <- Gen.option(strGen(FIELD_SIZES.getOrElse("br_type", Int.MaxValue), Gen.alphaNumChar)) 175 | br_renderengine <- Gen.option(strGen(FIELD_SIZES.getOrElse("br_renderengine", Int.MaxValue), Gen.alphaNumChar)) 176 | br_lang <- Gen.option(strGen(FIELD_SIZES.getOrElse("br_lang", Int.MaxValue), Gen.alphaNumChar)) 177 | br_features_pdf <- Gen.option(Arbitrary.arbitrary[Boolean]) 178 | br_features_flash <- Gen.option(Arbitrary.arbitrary[Boolean]) 179 | br_features_java <- Gen.option(Arbitrary.arbitrary[Boolean]) 180 | br_features_director <- Gen.option(Arbitrary.arbitrary[Boolean]) 181 | br_features_quicktime <- Gen.option(Arbitrary.arbitrary[Boolean]) 182 | br_features_realplayer <- Gen.option(Arbitrary.arbitrary[Boolean]) 183 | br_features_windowsmedia <- Gen.option(Arbitrary.arbitrary[Boolean]) 184 | br_features_gears <- Gen.option(Arbitrary.arbitrary[Boolean]) 185 | br_features_silverlight <- Gen.option(Arbitrary.arbitrary[Boolean]) 186 | br_cookies <- Gen.option(Arbitrary.arbitrary[Boolean]) 187 | br_colordepth <- Gen.option(strGen(FIELD_SIZES.getOrElse("br_colordepth", Int.MaxValue), Gen.alphaNumChar)) 188 | br_viewwidth <- Gen.option(Gen.chooseNum(1, 10000)) 189 | br_viewheight <- Gen.option(Gen.chooseNum(1, 10000)) 190 | os_name <- Gen.option(strGen(FIELD_SIZES.getOrElse("os_name", Int.MaxValue), Gen.alphaNumChar)) 191 | os_family <- Gen.option(strGen(FIELD_SIZES.getOrElse("os_family", Int.MaxValue), Gen.alphaNumChar)) 192 | os_manufacturer <- Gen.option(strGen(FIELD_SIZES.getOrElse("os_manufacturer", Int.MaxValue), Gen.alphaNumChar)) 193 | os_timezone <- Gen.option(strGen(FIELD_SIZES.getOrElse("os_timezone", Int.MaxValue), Gen.alphaNumChar)) 194 | dvce_type <- Gen.option(strGen(FIELD_SIZES.getOrElse("dvce_type", Int.MaxValue), Gen.alphaNumChar)) 195 | dvce_ismobile <- Gen.option(Arbitrary.arbitrary[Boolean]) 196 | dvce_screenwidth <- Gen.option(Gen.chooseNum(1, 10000)) 197 | dvce_screenheight <- Gen.option(Gen.chooseNum(1, 10000)) 198 | doc_charset <- Gen.option(strGen(FIELD_SIZES.getOrElse("doc_charset", Int.MaxValue), Gen.alphaNumChar)) 199 | doc_width <- Gen.option(Gen.chooseNum(1, 10000)) 200 | doc_height <- Gen.option(Gen.chooseNum(1, 10000)) 201 | tr_currency <- Gen.option(strGen(FIELD_SIZES.getOrElse("tr_currency", Int.MaxValue), Gen.alphaNumChar)) 202 | tr_total_base <- Gen.option(Arbitrary.arbitrary[Double]) 203 | tr_tax_base <- Gen.option(Arbitrary.arbitrary[Double]) 204 | tr_shipping_base <- Gen.option(Arbitrary.arbitrary[Double]) 205 | ti_currency <- Gen.option(strGen(FIELD_SIZES.getOrElse("ti_currency", Int.MaxValue), Gen.alphaNumChar)) 206 | ti_price_base <- Gen.option(Arbitrary.arbitrary[Double]) 207 | base_currency <- Gen.option(strGen(FIELD_SIZES.getOrElse("base_currency", Int.MaxValue), Gen.alphaNumChar)) 208 | geo_timezone <- Gen.option(strGen(FIELD_SIZES.getOrElse("geo_timezone", Int.MaxValue), Gen.alphaNumChar)) 209 | mkt_clickid <- Gen.option(Gen.uuid).map(_.map(_.toString())) 210 | mkt_network <- Gen.option(strGen(FIELD_SIZES.getOrElse("mkt_network", Int.MaxValue), Gen.alphaNumChar)) 211 | etl_tags <- Gen.option(strGen(FIELD_SIZES.getOrElse("etl_tags", Int.MaxValue), Gen.alphaNumChar)) 212 | dvce_sent_tstamp <- Gen.option(instantGen) 213 | refr_domain_userid <- Gen.option(Gen.uuid).map(_.map(_.toString())) 214 | refr_dvce_tstamp <- Gen.option(instantGen) 215 | derived_contexts <- Gen.oneOf(derived_contexts, Contexts(Nil)) 216 | domain_sessionid <- Gen.option(Gen.uuid).map(_.map(_.toString())) 217 | derived_tstamp <- Gen.option(instantGen) 218 | event_vendor <- Gen.option(Gen.identifier) 219 | event_name <- Gen.option(Gen.identifier) 220 | event_format <- Gen.option("jsonschema") 221 | event_version <- Gen.option(strGen(FIELD_SIZES.getOrElse("event_version", Int.MaxValue), Gen.alphaNumChar)) 222 | event_fingerprint <- Gen.option(strGen(FIELD_SIZES.getOrElse("event_fingerprint", Int.MaxValue), Gen.alphaNumChar)) 223 | true_tstamp <- Gen.option(instantGen) 224 | } yield Event( 225 | app_id, 226 | platform, 227 | etl_tstamp, 228 | collector_tstamp, 229 | dvce_created_tstamp, 230 | event, 231 | event_id, 232 | txn_id, 233 | name_tracker, 234 | v_tracker, 235 | v_collector, 236 | v_etl, 237 | user_id, 238 | user_ipaddress, 239 | user_fingerprint, 240 | domain_userid, 241 | domain_sessionidx, 242 | network_userid, 243 | geo_country, 244 | geo_region, 245 | geo_city, 246 | geo_zipcode, 247 | geo_latitude, 248 | geo_longitude, 249 | geo_region_name, 250 | ip_isp, 251 | ip_organization, 252 | ip_domain, 253 | ip_netspeed, 254 | page_url, 255 | page_title, 256 | page_referrer, 257 | page_urlscheme, 258 | page_urlhost, 259 | page_urlport, 260 | page_urlpath, 261 | page_urlquery, 262 | page_urlfragment, 263 | refr_urlscheme, 264 | refr_urlhost, 265 | refr_urlport, 266 | refr_urlpath, 267 | refr_urlquery, 268 | refr_urlfragment, 269 | refr_medium, 270 | refr_source, 271 | refr_term, 272 | mkt_medium, 273 | mkt_source, 274 | mkt_term, 275 | mkt_content, 276 | mkt_campaign, 277 | contexts, 278 | se_category, 279 | se_action, 280 | se_label, 281 | se_property, 282 | se_value, 283 | unstruct_event, 284 | tr_orderid, 285 | tr_affiliation, 286 | tr_total, 287 | tr_tax, 288 | tr_shipping, 289 | tr_city, 290 | tr_state, 291 | tr_country, 292 | ti_orderid, 293 | ti_sku, 294 | ti_name, 295 | ti_category, 296 | ti_price, 297 | ti_quantity, 298 | pp_xoffset_min, 299 | pp_xoffset_max, 300 | pp_yoffset_min, 301 | pp_yoffset_max, 302 | useragent, 303 | br_name, 304 | br_family, 305 | br_version, 306 | br_type, 307 | br_renderengine, 308 | br_lang, 309 | br_features_pdf, 310 | br_features_flash, 311 | br_features_java, 312 | br_features_director, 313 | br_features_quicktime, 314 | br_features_realplayer, 315 | br_features_windowsmedia, 316 | br_features_gears, 317 | br_features_silverlight, 318 | br_cookies, 319 | br_colordepth, 320 | br_viewwidth, 321 | br_viewheight, 322 | os_name, 323 | os_family, 324 | os_manufacturer, 325 | os_timezone, 326 | dvce_type, 327 | dvce_ismobile, 328 | dvce_screenwidth, 329 | dvce_screenheight, 330 | doc_charset, 331 | doc_width, 332 | doc_height, 333 | tr_currency, 334 | tr_total_base, 335 | tr_tax_base, 336 | tr_shipping_base, 337 | ti_currency, 338 | ti_price_base, 339 | base_currency, 340 | geo_timezone, 341 | mkt_clickid, 342 | mkt_network, 343 | etl_tags, 344 | dvce_sent_tstamp, 345 | refr_domain_userid, 346 | refr_dvce_tstamp, 347 | derived_contexts, 348 | domain_sessionid, 349 | derived_tstamp, 350 | event_vendor, 351 | event_name, 352 | event_format, 353 | event_version, 354 | event_fingerprint, 355 | true_tstamp 356 | ) 357 | } 358 | -------------------------------------------------------------------------------- /src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/EventSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2020 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | 14 | package com.snowplowanalytics.snowplow.analytics.scalasdk 15 | 16 | // java 17 | import java.time.Instant 18 | import java.util.UUID 19 | import java.nio.ByteBuffer 20 | import java.nio.charset.StandardCharsets 21 | 22 | // cats 23 | import cats.data.Validated.{Invalid, Valid} 24 | import cats.data.NonEmptyList 25 | import cats.syntax.either._ 26 | 27 | // circe 28 | import io.circe.{Decoder, Encoder, Json, JsonObject} 29 | import io.circe.syntax._ 30 | import io.circe.parser._ 31 | import io.circe.generic.semiauto._ 32 | 33 | // Specs2 34 | import org.specs2.mutable.Specification 35 | 36 | // Iglu 37 | import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} 38 | 39 | // ScalaCheck 40 | import org.specs2.ScalaCheck 41 | import org.scalacheck.Prop.forAll 42 | 43 | // This library 44 | import com.snowplowanalytics.snowplow.analytics.scalasdk.validate.FIELD_SIZES 45 | import com.snowplowanalytics.snowplow.analytics.scalasdk.SnowplowEvent._ 46 | import com.snowplowanalytics.snowplow.analytics.scalasdk.ParsingError._ 47 | import com.snowplowanalytics.snowplow.analytics.scalasdk.ParsingError.RowDecodingErrorInfo._ 48 | 49 | /** 50 | * Tests Event case class 51 | */ 52 | class EventSpec extends Specification with ScalaCheck { 53 | import EventSpec._ 54 | 55 | "The Event parser" should { 56 | "successfully convert a tab-separated pageview event string to an Event instance and JSON" in { 57 | 58 | val input = baseInput.map { 59 | case ("contexts", _) => ("contexts", contextsJson) 60 | case ("unstruct_event", _) => ("unstruct_event", unstructJson) 61 | case ("derived_contexts", _) => ("derived_contexts", derivedContextsJson) 62 | case other => other 63 | } 64 | 65 | val expected = baseExpected.copy( 66 | contexts = Contexts( 67 | List( 68 | SelfDescribingData( 69 | SchemaKey( 70 | "org.schema", 71 | "WebPage", 72 | "jsonschema", 73 | SchemaVer.Full(1, 0, 0) 74 | ), 75 | JsonObject( 76 | ("genre", "blog".asJson), 77 | ("inLanguage", "en-US".asJson), 78 | ("datePublished", "2014-11-06T00:00:00Z".asJson), 79 | ("author", "Fred Blundun".asJson), 80 | ("breadcrumb", List("blog", "releases").asJson), 81 | ("keywords", List("snowplow", "javascript", "tracker", "event").asJson) 82 | ).asJson 83 | ), 84 | SelfDescribingData( 85 | SchemaKey( 86 | "org.w3", 87 | "PerformanceTiming", 88 | "jsonschema", 89 | SchemaVer.Full(1, 0, 0) 90 | ), 91 | JsonObject( 92 | ("navigationStart", 1415358089861L.asJson), 93 | ("unloadEventStart", 1415358090270L.asJson), 94 | ("unloadEventEnd", 1415358090287L.asJson), 95 | ("redirectStart", 0.asJson), 96 | ("redirectEnd", 0.asJson), 97 | ("fetchStart", 1415358089870L.asJson), 98 | ("domainLookupStart", 1415358090102L.asJson), 99 | ("domainLookupEnd", 1415358090102L.asJson), 100 | ("connectStart", 1415358090103L.asJson), 101 | ("connectEnd", 1415358090183L.asJson), 102 | ("requestStart", 1415358090183L.asJson), 103 | ("responseStart", 1415358090265L.asJson), 104 | ("responseEnd", 1415358090265L.asJson), 105 | ("domLoading", 1415358090270L.asJson), 106 | ("domInteractive", 1415358090886L.asJson), 107 | ("domContentLoadedEventStart", 1415358090968L.asJson), 108 | ("domContentLoadedEventEnd", 1415358091309L.asJson), 109 | ("domComplete", 0.asJson), 110 | ("loadEventStart", 0.asJson), 111 | ("loadEventEnd", 0.asJson) 112 | ).asJson 113 | ) 114 | ) 115 | ), 116 | unstruct_event = UnstructEvent( 117 | Some( 118 | SelfDescribingData( 119 | SchemaKey( 120 | "com.snowplowanalytics.snowplow", 121 | "link_click", 122 | "jsonschema", 123 | SchemaVer.Full(1, 0, 1) 124 | ), 125 | JsonObject( 126 | ("targetUrl", "http://www.example.com".asJson), 127 | ("elementClasses", List("foreground").asJson), 128 | ("elementId", "exampleLink".asJson) 129 | ).asJson 130 | ) 131 | ) 132 | ), 133 | derived_contexts = Contexts( 134 | List( 135 | SelfDescribingData( 136 | SchemaKey( 137 | "com.snowplowanalytics.snowplow", 138 | "ua_parser_context", 139 | "jsonschema", 140 | SchemaVer.Full(1, 0, 0) 141 | ), 142 | JsonObject( 143 | ("useragentFamily", "IE".asJson), 144 | ("useragentMajor", "7".asJson), 145 | ("useragentMinor", "0".asJson), 146 | ("useragentPatch", Json.Null), 147 | ("useragentVersion", "IE 7.0".asJson), 148 | ("osFamily", "Windows XP".asJson), 149 | ("osMajor", Json.Null), 150 | ("osMinor", Json.Null), 151 | ("osPatch", Json.Null), 152 | ("osPatchMinor", Json.Null), 153 | ("osVersion", "Windows XP".asJson), 154 | ("deviceFamily", "Other".asJson) 155 | ).asJson 156 | ) 157 | ) 158 | ) 159 | ) 160 | 161 | val eventValues = input.unzip._2.mkString("\t") 162 | val eventValuesBytes = ByteBuffer.wrap(eventValues.getBytes(StandardCharsets.UTF_8)) 163 | 164 | val event1 = Event.parse(eventValues) 165 | val event2 = Event.parser().parse(eventValues) 166 | val event3 = Event.parser(FIELD_SIZES).parse(eventValues) 167 | val event4 = Event.parseBytes(eventValuesBytes) 168 | val event5 = Event.parser().parseBytes(eventValuesBytes) 169 | val event6 = Event.parser(FIELD_SIZES).parseBytes(eventValuesBytes) 170 | 171 | // Case class must be processed as expected, for all varieties of the parser 172 | event1 mustEqual Valid(expected) 173 | event2 mustEqual Valid(expected) 174 | event3 mustEqual Valid(expected) 175 | event4 mustEqual Valid(expected) 176 | event5 mustEqual Valid(expected) 177 | event6 mustEqual Valid(expected) 178 | 179 | val eventJson = event1.getOrElse(throw new RuntimeException("Failed to parse event")).toJson(true) 180 | 181 | val expectedJson = parse("""{ 182 | "geo_location" : "37.443604,-122.4124", 183 | "app_id" : "angry-birds", 184 | "platform" : "web", 185 | "etl_tstamp" : "2017-01-26T00:01:25.292Z", 186 | "collector_tstamp" : "2013-11-26T00:02:05Z", 187 | "dvce_created_tstamp" : "2013-11-26T00:03:57.885Z", 188 | "event" : "page_view", 189 | "event_id" : "c6ef3124-b53a-4b13-a233-0088f79dcbcb", 190 | "txn_id" : 41828, 191 | "name_tracker" : "cloudfront-1", 192 | "v_tracker" : "js-2.1.0", 193 | "v_collector" : "clj-tomcat-0.1.0", 194 | "v_etl" : "serde-0.5.2", 195 | "user_id" : "jon.doe@email.com", 196 | "user_ipaddress" : "92.231.54.234", 197 | "user_fingerprint" : "2161814971", 198 | "domain_userid" : "bc2e92ec6c204a14", 199 | "domain_sessionidx" : 3, 200 | "network_userid" : "ecdff4d0-9175-40ac-a8bb-325c49733607", 201 | "geo_country" : "US", 202 | "geo_region" : "TX", 203 | "geo_city" : "New York", 204 | "geo_zipcode" : "94109", 205 | "geo_latitude" : 37.443604, 206 | "geo_longitude" : -122.4124, 207 | "geo_region_name" : "Florida", 208 | "ip_isp" : "FDN Communications", 209 | "ip_organization" : "Bouygues Telecom", 210 | "ip_domain" : "nuvox.net", 211 | "ip_netspeed" : "Cable/DSL", 212 | "page_url" : "http://www.snowplowanalytics.com", 213 | "page_title" : "On Analytics", 214 | "page_referrer" : null, 215 | "page_urlscheme" : "http", 216 | "page_urlhost" : "www.snowplowanalytics.com", 217 | "page_urlport" : 80, 218 | "page_urlpath" : "/product/index.html", 219 | "page_urlquery" : "id=GTM-DLRG", 220 | "page_urlfragment" : "4-conclusion", 221 | "refr_urlscheme" : null, 222 | "refr_urlhost" : null, 223 | "refr_urlport" : null, 224 | "refr_urlpath" : null, 225 | "refr_urlquery" : null, 226 | "refr_urlfragment" : null, 227 | "refr_medium" : null, 228 | "refr_source" : null, 229 | "refr_term" : null, 230 | "mkt_medium" : null, 231 | "mkt_source" : null, 232 | "mkt_term" : null, 233 | "mkt_content" : null, 234 | "mkt_campaign" : null, 235 | "contexts_org_schema_web_page_1" : [ { 236 | "_schema_version" : "1-0-0", 237 | "genre" : "blog", 238 | "inLanguage" : "en-US", 239 | "datePublished" : "2014-11-06T00:00:00Z", 240 | "author" : "Fred Blundun", 241 | "breadcrumb" : [ "blog", "releases" ], 242 | "keywords" : [ "snowplow", "javascript", "tracker", "event" ] 243 | } ], 244 | "contexts_org_w3_performance_timing_1" : [ { 245 | "_schema_version" : "1-0-0", 246 | "navigationStart" : 1415358089861, 247 | "unloadEventStart" : 1415358090270, 248 | "unloadEventEnd" : 1415358090287, 249 | "redirectStart" : 0, 250 | "redirectEnd" : 0, 251 | "fetchStart" : 1415358089870, 252 | "domainLookupStart" : 1415358090102, 253 | "domainLookupEnd" : 1415358090102, 254 | "connectStart" : 1415358090103, 255 | "connectEnd" : 1415358090183, 256 | "requestStart" : 1415358090183, 257 | "responseStart" : 1415358090265, 258 | "responseEnd" : 1415358090265, 259 | "domLoading" : 1415358090270, 260 | "domInteractive" : 1415358090886, 261 | "domContentLoadedEventStart" : 1415358090968, 262 | "domContentLoadedEventEnd" : 1415358091309, 263 | "domComplete" : 0, 264 | "loadEventStart" : 0, 265 | "loadEventEnd" : 0 266 | } ], 267 | "se_category" : null, 268 | "se_action" : null, 269 | "se_label" : null, 270 | "se_property" : null, 271 | "se_value" : null, 272 | "unstruct_event_com_snowplowanalytics_snowplow_link_click_1" : { 273 | "targetUrl" : "http://www.example.com", 274 | "elementClasses" : [ "foreground" ], 275 | "elementId" : "exampleLink" 276 | }, 277 | "tr_orderid" : null, 278 | "tr_affiliation" : null, 279 | "tr_total" : null, 280 | "tr_tax" : null, 281 | "tr_shipping" : null, 282 | "tr_city" : null, 283 | "tr_state" : null, 284 | "tr_country" : null, 285 | "ti_orderid" : null, 286 | "ti_sku" : null, 287 | "ti_name" : null, 288 | "ti_category" : null, 289 | "ti_price" : null, 290 | "ti_quantity" : null, 291 | "pp_xoffset_min" : null, 292 | "pp_xoffset_max" : null, 293 | "pp_yoffset_min" : null, 294 | "pp_yoffset_max" : null, 295 | "useragent" : null, 296 | "br_name" : null, 297 | "br_family" : null, 298 | "br_version" : null, 299 | "br_type" : null, 300 | "br_renderengine" : null, 301 | "br_lang" : null, 302 | "br_features_pdf" : true, 303 | "br_features_flash" : false, 304 | "br_features_java" : null, 305 | "br_features_director" : null, 306 | "br_features_quicktime" : null, 307 | "br_features_realplayer" : null, 308 | "br_features_windowsmedia" : null, 309 | "br_features_gears" : null, 310 | "br_features_silverlight" : null, 311 | "br_cookies" : null, 312 | "br_colordepth" : null, 313 | "br_viewwidth" : null, 314 | "br_viewheight" : null, 315 | "os_name" : null, 316 | "os_family" : null, 317 | "os_manufacturer" : null, 318 | "os_timezone" : null, 319 | "dvce_type" : null, 320 | "dvce_ismobile" : null, 321 | "dvce_screenwidth" : null, 322 | "dvce_screenheight" : null, 323 | "doc_charset" : null, 324 | "doc_width" : null, 325 | "doc_height" : null, 326 | "tr_currency" : null, 327 | "tr_total_base" : null, 328 | "tr_tax_base" : null, 329 | "tr_shipping_base" : null, 330 | "ti_currency" : null, 331 | "ti_price_base" : null, 332 | "base_currency" : null, 333 | "geo_timezone" : null, 334 | "mkt_clickid" : null, 335 | "mkt_network" : null, 336 | "etl_tags" : null, 337 | "dvce_sent_tstamp" : null, 338 | "refr_domain_userid" : null, 339 | "refr_dvce_tstamp" : null, 340 | "contexts_com_snowplowanalytics_snowplow_ua_parser_context_1": [{ 341 | "_schema_version" : "1-0-0", 342 | "useragentFamily": "IE", 343 | "useragentMajor": "7", 344 | "useragentMinor": "0", 345 | "useragentPatch": null, 346 | "useragentVersion": "IE 7.0", 347 | "osFamily": "Windows XP", 348 | "osMajor": null, 349 | "osMinor": null, 350 | "osPatch": null, 351 | "osPatchMinor": null, 352 | "osVersion": "Windows XP", 353 | "deviceFamily": "Other" 354 | }], 355 | "domain_sessionid": "2b15e5c8-d3b1-11e4-b9d6-1681e6b88ec1", 356 | "derived_tstamp": "2013-11-26T00:03:57.886Z", 357 | "event_vendor": "com.snowplowanalytics.snowplow", 358 | "event_name": "link_click", 359 | "event_format": "jsonschema", 360 | "event_version": "1-0-0", 361 | "event_fingerprint": "e3dbfa9cca0412c3d4052863cefb547f", 362 | "true_tstamp": "2013-11-26T00:03:57.886Z" 363 | }""").getOrElse(throw new RuntimeException("Failed to parse expected JSON")) 364 | 365 | // JSON output must be equal to output from the old transformer. (NB: field ordering in new JSON will be randomized) 366 | eventJson mustEqual expectedJson 367 | } 368 | 369 | "successfully convert a tab-separated pageview event string to an Event instance and JSON, omitting unstruct_event and contexts nullary fields" in { 370 | 371 | val input = baseInput 372 | val expected = baseExpected 373 | 374 | val eventValues = input.unzip._2.mkString("\t") 375 | val eventValuesBytes = ByteBuffer.wrap(eventValues.getBytes(StandardCharsets.UTF_8)) 376 | val event = Event.parse(eventValues) 377 | val event2 = Event.parseBytes(eventValuesBytes) 378 | 379 | // Case class must be processed as expected 380 | event mustEqual Valid(expected) 381 | event2 mustEqual Valid(expected) 382 | 383 | val eventJson = event.getOrElse(throw new RuntimeException("Failed to parse event")).toJson(true) 384 | 385 | val expectedJson = parse("""{ 386 | "geo_location" : "37.443604,-122.4124", 387 | "app_id" : "angry-birds", 388 | "platform" : "web", 389 | "etl_tstamp" : "2017-01-26T00:01:25.292Z", 390 | "collector_tstamp" : "2013-11-26T00:02:05Z", 391 | "dvce_created_tstamp" : "2013-11-26T00:03:57.885Z", 392 | "event" : "page_view", 393 | "event_id" : "c6ef3124-b53a-4b13-a233-0088f79dcbcb", 394 | "txn_id" : 41828, 395 | "name_tracker" : "cloudfront-1", 396 | "v_tracker" : "js-2.1.0", 397 | "v_collector" : "clj-tomcat-0.1.0", 398 | "v_etl" : "serde-0.5.2", 399 | "user_id" : "jon.doe@email.com", 400 | "user_ipaddress" : "92.231.54.234", 401 | "user_fingerprint" : "2161814971", 402 | "domain_userid" : "bc2e92ec6c204a14", 403 | "domain_sessionidx" : 3, 404 | "network_userid" : "ecdff4d0-9175-40ac-a8bb-325c49733607", 405 | "geo_country" : "US", 406 | "geo_region" : "TX", 407 | "geo_city" : "New York", 408 | "geo_zipcode" : "94109", 409 | "geo_latitude" : 37.443604, 410 | "geo_longitude" : -122.4124, 411 | "geo_region_name" : "Florida", 412 | "ip_isp" : "FDN Communications", 413 | "ip_organization" : "Bouygues Telecom", 414 | "ip_domain" : "nuvox.net", 415 | "ip_netspeed" : "Cable/DSL", 416 | "page_url" : "http://www.snowplowanalytics.com", 417 | "page_title" : "On Analytics", 418 | "page_referrer" : null, 419 | "page_urlscheme" : "http", 420 | "page_urlhost" : "www.snowplowanalytics.com", 421 | "page_urlport" : 80, 422 | "page_urlpath" : "/product/index.html", 423 | "page_urlquery" : "id=GTM-DLRG", 424 | "page_urlfragment" : "4-conclusion", 425 | "refr_urlscheme" : null, 426 | "refr_urlhost" : null, 427 | "refr_urlport" : null, 428 | "refr_urlpath" : null, 429 | "refr_urlquery" : null, 430 | "refr_urlfragment" : null, 431 | "refr_medium" : null, 432 | "refr_source" : null, 433 | "refr_term" : null, 434 | "mkt_medium" : null, 435 | "mkt_source" : null, 436 | "mkt_term" : null, 437 | "mkt_content" : null, 438 | "mkt_campaign" : null, 439 | "se_category" : null, 440 | "se_action" : null, 441 | "se_label" : null, 442 | "se_property" : null, 443 | "se_value" : null, 444 | "tr_orderid" : null, 445 | "tr_affiliation" : null, 446 | "tr_total" : null, 447 | "tr_tax" : null, 448 | "tr_shipping" : null, 449 | "tr_city" : null, 450 | "tr_state" : null, 451 | "tr_country" : null, 452 | "ti_orderid" : null, 453 | "ti_sku" : null, 454 | "ti_name" : null, 455 | "ti_category" : null, 456 | "ti_price" : null, 457 | "ti_quantity" : null, 458 | "pp_xoffset_min" : null, 459 | "pp_xoffset_max" : null, 460 | "pp_yoffset_min" : null, 461 | "pp_yoffset_max" : null, 462 | "useragent" : null, 463 | "br_name" : null, 464 | "br_family" : null, 465 | "br_version" : null, 466 | "br_type" : null, 467 | "br_renderengine" : null, 468 | "br_lang" : null, 469 | "br_features_pdf" : true, 470 | "br_features_flash" : false, 471 | "br_features_java" : null, 472 | "br_features_director" : null, 473 | "br_features_quicktime" : null, 474 | "br_features_realplayer" : null, 475 | "br_features_windowsmedia" : null, 476 | "br_features_gears" : null, 477 | "br_features_silverlight" : null, 478 | "br_cookies" : null, 479 | "br_colordepth" : null, 480 | "br_viewwidth" : null, 481 | "br_viewheight" : null, 482 | "os_name" : null, 483 | "os_family" : null, 484 | "os_manufacturer" : null, 485 | "os_timezone" : null, 486 | "dvce_type" : null, 487 | "dvce_ismobile" : null, 488 | "dvce_screenwidth" : null, 489 | "dvce_screenheight" : null, 490 | "doc_charset" : null, 491 | "doc_width" : null, 492 | "doc_height" : null, 493 | "tr_currency" : null, 494 | "tr_total_base" : null, 495 | "tr_tax_base" : null, 496 | "tr_shipping_base" : null, 497 | "ti_currency" : null, 498 | "ti_price_base" : null, 499 | "base_currency" : null, 500 | "geo_timezone" : null, 501 | "mkt_clickid" : null, 502 | "mkt_network" : null, 503 | "etl_tags" : null, 504 | "dvce_sent_tstamp" : null, 505 | "refr_domain_userid" : null, 506 | "refr_dvce_tstamp" : null, 507 | "domain_sessionid": "2b15e5c8-d3b1-11e4-b9d6-1681e6b88ec1", 508 | "derived_tstamp": "2013-11-26T00:03:57.886Z", 509 | "event_vendor": "com.snowplowanalytics.snowplow", 510 | "event_name": "link_click", 511 | "event_format": "jsonschema", 512 | "event_version": "1-0-0", 513 | "event_fingerprint": "e3dbfa9cca0412c3d4052863cefb547f", 514 | "true_tstamp": "2013-11-26T00:03:57.886Z" 515 | }""").getOrElse(throw new RuntimeException("Failed to parse expected JSON")) 516 | 517 | // JSON output must be equal to output from the old transformer. (NB: field ordering in new JSON will be randomized) 518 | eventJson mustEqual expectedJson 519 | } 520 | 521 | "successfully merge two matching contexts into 2-elements array" in { 522 | 523 | val input = baseInput.map { 524 | case ("contexts", _) => ("contexts", contextsWithDuplicate) 525 | case ("unstruct_event", _) => ("unstruct_event", unstructJson) 526 | case ("derived_contexts", _) => ("derived_contexts", derivedContextsJson) 527 | case other => other 528 | } 529 | 530 | val expected = baseExpected.copy( 531 | contexts = Contexts( 532 | List( 533 | SelfDescribingData( 534 | SchemaKey( 535 | "org.schema", 536 | "WebPage", 537 | "jsonschema", 538 | SchemaVer.Full(1, 0, 0) 539 | ), 540 | JsonObject( 541 | ("genre", "blog".asJson), 542 | ("inLanguage", "en-US".asJson), 543 | ("datePublished", "2014-11-06T00:00:00Z".asJson), 544 | ("author", "Fred Blundun".asJson), 545 | ("breadcrumb", List("blog", "releases").asJson), 546 | ("keywords", List("snowplow", "javascript", "tracker", "event").asJson) 547 | ).asJson 548 | ), 549 | SelfDescribingData( 550 | SchemaKey( 551 | "org.acme", 552 | "context_one", 553 | "jsonschema", 554 | SchemaVer.Full(1, 0, 0) 555 | ), 556 | JsonObject( 557 | ("item", 1.asJson) 558 | ).asJson 559 | ), 560 | SelfDescribingData( 561 | SchemaKey( 562 | "org.acme", 563 | "context_one", 564 | "jsonschema", 565 | SchemaVer.Full(1, 0, 1) 566 | ), 567 | JsonObject( 568 | ("item", 2.asJson) 569 | ).asJson 570 | ) 571 | ) 572 | ), 573 | unstruct_event = UnstructEvent( 574 | Some( 575 | SelfDescribingData( 576 | SchemaKey( 577 | "com.snowplowanalytics.snowplow", 578 | "link_click", 579 | "jsonschema", 580 | SchemaVer.Full(1, 0, 1) 581 | ), 582 | JsonObject( 583 | ("targetUrl", "http://www.example.com".asJson), 584 | ("elementClasses", List("foreground").asJson), 585 | ("elementId", "exampleLink".asJson) 586 | ).asJson 587 | ) 588 | ) 589 | ), 590 | derived_contexts = Contexts( 591 | List( 592 | SelfDescribingData( 593 | SchemaKey( 594 | "com.snowplowanalytics.snowplow", 595 | "ua_parser_context", 596 | "jsonschema", 597 | SchemaVer.Full(1, 0, 0) 598 | ), 599 | JsonObject( 600 | ("useragentFamily", "IE".asJson), 601 | ("useragentMajor", "7".asJson), 602 | ("useragentMinor", "0".asJson), 603 | ("useragentPatch", Json.Null), 604 | ("useragentVersion", "IE 7.0".asJson), 605 | ("osFamily", "Windows XP".asJson), 606 | ("osMajor", Json.Null), 607 | ("osMinor", Json.Null), 608 | ("osPatch", Json.Null), 609 | ("osPatchMinor", Json.Null), 610 | ("osVersion", "Windows XP".asJson), 611 | ("deviceFamily", "Other".asJson) 612 | ).asJson 613 | ) 614 | ) 615 | ) 616 | ) 617 | 618 | val eventValues = input.unzip._2.mkString("\t") 619 | val eventValuesBytes = ByteBuffer.wrap(eventValues.getBytes(StandardCharsets.UTF_8)) 620 | val event = Event.parse(eventValues) 621 | val event2 = Event.parseBytes(eventValuesBytes) 622 | 623 | // Case class must be processed as expected 624 | event mustEqual Valid(expected) 625 | event2 mustEqual Valid(expected) 626 | 627 | val eventJson = event.getOrElse(throw new RuntimeException("Failed to parse event")).toJson(true) 628 | 629 | val expectedJson = parse("""{ 630 | "geo_location" : "37.443604,-122.4124", 631 | "app_id" : "angry-birds", 632 | "platform" : "web", 633 | "etl_tstamp" : "2017-01-26T00:01:25.292Z", 634 | "collector_tstamp" : "2013-11-26T00:02:05Z", 635 | "dvce_created_tstamp" : "2013-11-26T00:03:57.885Z", 636 | "event" : "page_view", 637 | "event_id" : "c6ef3124-b53a-4b13-a233-0088f79dcbcb", 638 | "txn_id" : 41828, 639 | "name_tracker" : "cloudfront-1", 640 | "v_tracker" : "js-2.1.0", 641 | "v_collector" : "clj-tomcat-0.1.0", 642 | "v_etl" : "serde-0.5.2", 643 | "user_id" : "jon.doe@email.com", 644 | "user_ipaddress" : "92.231.54.234", 645 | "user_fingerprint" : "2161814971", 646 | "domain_userid" : "bc2e92ec6c204a14", 647 | "domain_sessionidx" : 3, 648 | "network_userid" : "ecdff4d0-9175-40ac-a8bb-325c49733607", 649 | "geo_country" : "US", 650 | "geo_region" : "TX", 651 | "geo_city" : "New York", 652 | "geo_zipcode" : "94109", 653 | "geo_latitude" : 37.443604, 654 | "geo_longitude" : -122.4124, 655 | "geo_region_name" : "Florida", 656 | "ip_isp" : "FDN Communications", 657 | "ip_organization" : "Bouygues Telecom", 658 | "ip_domain" : "nuvox.net", 659 | "ip_netspeed" : "Cable/DSL", 660 | "page_url" : "http://www.snowplowanalytics.com", 661 | "page_title" : "On Analytics", 662 | "page_referrer" : null, 663 | "page_urlscheme" : "http", 664 | "page_urlhost" : "www.snowplowanalytics.com", 665 | "page_urlport" : 80, 666 | "page_urlpath" : "/product/index.html", 667 | "page_urlquery" : "id=GTM-DLRG", 668 | "page_urlfragment" : "4-conclusion", 669 | "refr_urlscheme" : null, 670 | "refr_urlhost" : null, 671 | "refr_urlport" : null, 672 | "refr_urlpath" : null, 673 | "refr_urlquery" : null, 674 | "refr_urlfragment" : null, 675 | "refr_medium" : null, 676 | "refr_source" : null, 677 | "refr_term" : null, 678 | "mkt_medium" : null, 679 | "mkt_source" : null, 680 | "mkt_term" : null, 681 | "mkt_content" : null, 682 | "mkt_campaign" : null, 683 | "contexts_org_schema_web_page_1" : [ { 684 | "_schema_version" : "1-0-0", 685 | "genre" : "blog", 686 | "inLanguage" : "en-US", 687 | "datePublished" : "2014-11-06T00:00:00Z", 688 | "author" : "Fred Blundun", 689 | "breadcrumb" : [ "blog", "releases" ], 690 | "keywords" : [ "snowplow", "javascript", "tracker", "event" ] 691 | } ], 692 | "contexts_org_acme_context_one_1" : [ 693 | { 694 | "_schema_version" : "1-0-0", 695 | "item" : 1 696 | }, 697 | { 698 | "_schema_version" : "1-0-1", 699 | "item" : 2 700 | } 701 | ], 702 | "se_category" : null, 703 | "se_action" : null, 704 | "se_label" : null, 705 | "se_property" : null, 706 | "se_value" : null, 707 | "unstruct_event_com_snowplowanalytics_snowplow_link_click_1" : { 708 | "targetUrl" : "http://www.example.com", 709 | "elementClasses" : [ "foreground" ], 710 | "elementId" : "exampleLink" 711 | }, 712 | "tr_orderid" : null, 713 | "tr_affiliation" : null, 714 | "tr_total" : null, 715 | "tr_tax" : null, 716 | "tr_shipping" : null, 717 | "tr_city" : null, 718 | "tr_state" : null, 719 | "tr_country" : null, 720 | "ti_orderid" : null, 721 | "ti_sku" : null, 722 | "ti_name" : null, 723 | "ti_category" : null, 724 | "ti_price" : null, 725 | "ti_quantity" : null, 726 | "pp_xoffset_min" : null, 727 | "pp_xoffset_max" : null, 728 | "pp_yoffset_min" : null, 729 | "pp_yoffset_max" : null, 730 | "useragent" : null, 731 | "br_name" : null, 732 | "br_family" : null, 733 | "br_version" : null, 734 | "br_type" : null, 735 | "br_renderengine" : null, 736 | "br_lang" : null, 737 | "br_features_pdf" : true, 738 | "br_features_flash" : false, 739 | "br_features_java" : null, 740 | "br_features_director" : null, 741 | "br_features_quicktime" : null, 742 | "br_features_realplayer" : null, 743 | "br_features_windowsmedia" : null, 744 | "br_features_gears" : null, 745 | "br_features_silverlight" : null, 746 | "br_cookies" : null, 747 | "br_colordepth" : null, 748 | "br_viewwidth" : null, 749 | "br_viewheight" : null, 750 | "os_name" : null, 751 | "os_family" : null, 752 | "os_manufacturer" : null, 753 | "os_timezone" : null, 754 | "dvce_type" : null, 755 | "dvce_ismobile" : null, 756 | "dvce_screenwidth" : null, 757 | "dvce_screenheight" : null, 758 | "doc_charset" : null, 759 | "doc_width" : null, 760 | "doc_height" : null, 761 | "tr_currency" : null, 762 | "tr_total_base" : null, 763 | "tr_tax_base" : null, 764 | "tr_shipping_base" : null, 765 | "ti_currency" : null, 766 | "ti_price_base" : null, 767 | "base_currency" : null, 768 | "geo_timezone" : null, 769 | "mkt_clickid" : null, 770 | "mkt_network" : null, 771 | "etl_tags" : null, 772 | "dvce_sent_tstamp" : null, 773 | "refr_domain_userid" : null, 774 | "refr_dvce_tstamp" : null, 775 | "contexts_com_snowplowanalytics_snowplow_ua_parser_context_1": [{ 776 | "_schema_version" : "1-0-0", 777 | "useragentFamily": "IE", 778 | "useragentMajor": "7", 779 | "useragentMinor": "0", 780 | "useragentPatch": null, 781 | "useragentVersion": "IE 7.0", 782 | "osFamily": "Windows XP", 783 | "osMajor": null, 784 | "osMinor": null, 785 | "osPatch": null, 786 | "osPatchMinor": null, 787 | "osVersion": "Windows XP", 788 | "deviceFamily": "Other" 789 | }], 790 | "domain_sessionid": "2b15e5c8-d3b1-11e4-b9d6-1681e6b88ec1", 791 | "derived_tstamp": "2013-11-26T00:03:57.886Z", 792 | "event_vendor": "com.snowplowanalytics.snowplow", 793 | "event_name": "link_click", 794 | "event_format": "jsonschema", 795 | "event_version": "1-0-0", 796 | "event_fingerprint": "e3dbfa9cca0412c3d4052863cefb547f", 797 | "true_tstamp": "2013-11-26T00:03:57.886Z" 798 | }""").getOrElse(throw new RuntimeException("Failed to parse expected JSON")) 799 | 800 | // JSON output must be equal to output from the old transformer. (NB: field ordering in new JSON will be randomized) 801 | eventJson mustEqual expectedJson 802 | } 803 | 804 | "return correct results from helper methods" in { 805 | val input = baseInput.map { 806 | case ("contexts", _) => ("contexts", contextsWithDuplicate) 807 | case ("unstruct_event", _) => ("unstruct_event", unstructJson) 808 | case ("derived_contexts", _) => ("derived_contexts", derivedContextsJson) 809 | case other => other 810 | } 811 | val eventValues = input.unzip._2.mkString("\t") 812 | val event = Event.parse(eventValues).getOrElse(throw new RuntimeException("Failed to parse event")) 813 | 814 | event.geoLocation must beSome(("geo_location", "37.443604,-122.4124".asJson)) 815 | event.contexts.toShreddedJson mustEqual Map( 816 | "contexts_org_schema_web_page_1" -> 817 | List( 818 | JsonObject( 819 | ("_schema_version", "1-0-0".asJson), 820 | ("genre", "blog".asJson), 821 | ("inLanguage", "en-US".asJson), 822 | ("datePublished", "2014-11-06T00:00:00Z".asJson), 823 | ("author", "Fred Blundun".asJson), 824 | ("breadcrumb", List("blog", "releases").asJson), 825 | ("keywords", List("snowplow", "javascript", "tracker", "event").asJson) 826 | ).asJson 827 | ).asJson, 828 | "contexts_org_acme_context_one_1" -> 829 | List( 830 | JsonObject( 831 | ("_schema_version", "1-0-0".asJson), 832 | ("item", 1.asJson) 833 | ).asJson, 834 | JsonObject( 835 | ("_schema_version", "1-0-1".asJson), 836 | ("item", 2.asJson) 837 | ).asJson 838 | ).asJson 839 | ) 840 | event.derived_contexts.toShreddedJson mustEqual Map( 841 | "contexts_com_snowplowanalytics_snowplow_ua_parser_context_1" -> 842 | List( 843 | JsonObject( 844 | ("_schema_version", "1-0-0".asJson), 845 | ("useragentFamily", "IE".asJson), 846 | ("useragentMajor", "7".asJson), 847 | ("useragentMinor", "0".asJson), 848 | ("useragentPatch", Json.Null), 849 | ("useragentVersion", "IE 7.0".asJson), 850 | ("osFamily", "Windows XP".asJson), 851 | ("osMajor", Json.Null), 852 | ("osMinor", Json.Null), 853 | ("osPatch", Json.Null), 854 | ("osPatchMinor", Json.Null), 855 | ("osVersion", "Windows XP".asJson), 856 | ("deviceFamily", "Other".asJson) 857 | ).asJson 858 | ).asJson 859 | ) 860 | event.unstruct_event.toShreddedJson must beSome( 861 | "unstruct_event_com_snowplowanalytics_snowplow_link_click_1", 862 | JsonObject( 863 | ("targetUrl", "http://www.example.com".asJson), 864 | ("elementClasses", List("foreground").asJson), 865 | ("elementId", "exampleLink".asJson) 866 | ).asJson 867 | ) 868 | } 869 | 870 | "fail (and combine errors) if values are invalid" in { 871 | 872 | val input = baseInput.map { 873 | case ("etl_tstamp", _) => ("etl_tstamp" -> "not_an_instant") 874 | case ("collector_tstamp", _) => ("collector_tstamp" -> "") 875 | case ("event_id", _) => ("event_id" -> "not_a_uuid") 876 | case ("txn_id", _) => ("txn_id" -> "not_an_integer") 877 | case ("v_collector", _) => ("v_collector" -> "") 878 | case ("geo_latitude", _) => ("geo_latitude" -> "not_a_double") 879 | case ("br_features_pdf", _) => ("br_features_pdf" -> "not_a_boolean") 880 | case other => other 881 | } 882 | 883 | val eventValues = input.unzip._2.mkString("\t") 884 | val eventValuesBytes = ByteBuffer.wrap(eventValues.getBytes(StandardCharsets.UTF_8)) 885 | val event = Event.parse(eventValues) 886 | val event2 = Event.parseBytes(eventValuesBytes) 887 | 888 | // Case class must be correctly invalidated 889 | val res = RowDecodingError( 890 | NonEmptyList.of( 891 | InvalidValue(Symbol("etl_tstamp"), "not_an_instant", "Cannot parse key etl_tstamp into datetime"), 892 | InvalidValue(Symbol("collector_tstamp"), "", "Field collector_tstamp cannot be empty"), 893 | InvalidValue(Symbol("event_id"), "not_a_uuid", "Cannot parse key event_id into UUID"), 894 | InvalidValue(Symbol("txn_id"), "not_an_integer", "Cannot parse key txn_id into integer"), 895 | InvalidValue(Symbol("v_collector"), "", "Field v_collector cannot be empty"), 896 | InvalidValue(Symbol("geo_latitude"), "not_a_double", "Cannot parse key geo_latitude into double"), 897 | InvalidValue(Symbol("br_features_pdf"), "not_a_boolean", "Cannot parse key br_features_pdf into boolean") 898 | ) 899 | ) 900 | event mustEqual Invalid(res) 901 | event2 mustEqual Invalid(res) 902 | } 903 | 904 | "fail if payload is not TSV" in { 905 | val str = "non tsv" 906 | val bytes = ByteBuffer.wrap(str.getBytes(StandardCharsets.UTF_8)) 907 | val event = Event.parse(str) 908 | val event2 = Event.parseBytes(bytes) 909 | event mustEqual Invalid(NotTSV) 910 | event2 mustEqual Invalid(NotTSV) 911 | } 912 | 913 | "fail if there are more fields than expected" in { 914 | val input = baseInput :+ "additional_field" -> "mock_value" 915 | val eventValues = input.unzip._2.mkString("\t") 916 | val eventValuesBytes = ByteBuffer.wrap(eventValues.getBytes(StandardCharsets.UTF_8)) 917 | val event = Event.parse(eventValues) 918 | val event2 = Event.parseBytes(eventValuesBytes) 919 | 920 | event mustEqual Invalid(FieldNumberMismatch(132)) 921 | event2 mustEqual Invalid(FieldNumberMismatch(132)) 922 | } 923 | 924 | "fail if there are fewer fields than expected" in { 925 | val input = List( 926 | "app_id" -> "angry-birds", 927 | "platform" -> "web", 928 | "etl_tstamp" -> "not_an_instant", 929 | "collector_tstamp" -> "" 930 | ) 931 | 932 | val eventValues = input.unzip._2.mkString("\t") 933 | val eventValuesBytes = ByteBuffer.wrap(eventValues.getBytes(StandardCharsets.UTF_8)) 934 | val event = Event.parse(eventValues) 935 | val event2 = Event.parseBytes(eventValuesBytes) 936 | 937 | event mustEqual Invalid(FieldNumberMismatch(4)) 938 | event2 mustEqual Invalid(FieldNumberMismatch(4)) 939 | } 940 | 941 | "successfully decode encoded event which has no contexts or unstruct_event" in { 942 | val event = baseExpected 943 | val eventJsonStr = event.toJson(false).noSpaces 944 | val eventJson = parse(eventJsonStr).getOrElse(throw new RuntimeException("Error while converting to json")) 945 | eventJson.as[Event] must beRight(event) 946 | } 947 | 948 | "successfully decode encoded event which has contexts but has no unstruct_event" in { 949 | val event = baseExpected.copy( 950 | contexts = Contexts( 951 | List( 952 | SelfDescribingData( 953 | SchemaKey( 954 | "org.schema", 955 | "WebPage", 956 | "jsonschema", 957 | SchemaVer.Full(1, 0, 0) 958 | ), 959 | JsonObject( 960 | ("genre", "blog".asJson), 961 | ("inLanguage", "en-US".asJson), 962 | ("datePublished", "2014-11-06T00:00:00Z".asJson), 963 | ("author", "Fred Blundun".asJson), 964 | ("breadcrumb", List("blog", "releases").asJson), 965 | ("keywords", List("snowplow", "javascript", "tracker", "event").asJson) 966 | ).asJson 967 | ), 968 | SelfDescribingData( 969 | SchemaKey( 970 | "org.w3", 971 | "PerformanceTiming", 972 | "jsonschema", 973 | SchemaVer.Full(1, 0, 0) 974 | ), 975 | JsonObject( 976 | ("navigationStart", 1415358089861L.asJson), 977 | ("unloadEventStart", 1415358090270L.asJson), 978 | ("unloadEventEnd", 1415358090287L.asJson), 979 | ("redirectStart", 0.asJson), 980 | ("redirectEnd", 0.asJson), 981 | ("fetchStart", 1415358089870L.asJson), 982 | ("domainLookupStart", 1415358090102L.asJson), 983 | ("domainLookupEnd", 1415358090102L.asJson), 984 | ("connectStart", 1415358090103L.asJson), 985 | ("connectEnd", 1415358090183L.asJson), 986 | ("requestStart", 1415358090183L.asJson), 987 | ("responseStart", 1415358090265L.asJson), 988 | ("responseEnd", 1415358090265L.asJson), 989 | ("domLoading", 1415358090270L.asJson), 990 | ("domInteractive", 1415358090886L.asJson), 991 | ("domContentLoadedEventStart", 1415358090968L.asJson), 992 | ("domContentLoadedEventEnd", 1415358091309L.asJson), 993 | ("domComplete", 0.asJson), 994 | ("loadEventStart", 0.asJson), 995 | ("loadEventEnd", 0.asJson) 996 | ).asJson 997 | ) 998 | ) 999 | ), 1000 | derived_contexts = Contexts( 1001 | List( 1002 | SelfDescribingData( 1003 | SchemaKey( 1004 | "com.snowplowanalytics.snowplow", 1005 | "ua_parser_context", 1006 | "jsonschema", 1007 | SchemaVer.Full(1, 0, 0) 1008 | ), 1009 | JsonObject( 1010 | ("useragentFamily", "IE".asJson), 1011 | ("useragentMajor", "7".asJson), 1012 | ("useragentMinor", "0".asJson), 1013 | ("useragentPatch", Json.Null), 1014 | ("useragentVersion", "IE 7.0".asJson), 1015 | ("osFamily", "Windows XP".asJson), 1016 | ("osMajor", Json.Null), 1017 | ("osMinor", Json.Null), 1018 | ("osPatch", Json.Null), 1019 | ("osPatchMinor", Json.Null), 1020 | ("osVersion", "Windows XP".asJson), 1021 | ("deviceFamily", "Other".asJson) 1022 | ).asJson 1023 | ) 1024 | ) 1025 | ) 1026 | ) 1027 | val eventJson = event.toJson(false) 1028 | eventJson.as[Event] must beRight(event) 1029 | } 1030 | 1031 | "permissively decode an oversized event" in { 1032 | parse(s"""{ 1033 | "collector_tstamp" : "2021-12-06T15:47:07.920430Z", 1034 | "event_id" : "bbb05861-0f11-4986-b23b-87e6e22609be", 1035 | "v_collector" : "${"v" * 101}", 1036 | "v_etl" : "v_etl", 1037 | "contexts" : {}, 1038 | "unstruct_event": {}, 1039 | "derived_contexts" : {} 1040 | }""".stripMargin).getOrElse(throw new RuntimeException("Error while converting to json")).as[Event] must beRight( 1041 | Event 1042 | .minimal(UUID.fromString("bbb05861-0f11-4986-b23b-87e6e22609be"), 1043 | Instant.parse("2021-12-06T15:47:07.920430Z"), 1044 | "v" * 101, 1045 | "v_etl" 1046 | ) 1047 | ) 1048 | } 1049 | 1050 | "successfully decode encoded event which has unstruct_event but has no contexts" in { 1051 | val event = baseExpected.copy( 1052 | unstruct_event = UnstructEvent( 1053 | Some( 1054 | SelfDescribingData( 1055 | SchemaKey( 1056 | "com.snowplowanalytics.snowplow", 1057 | "link_click", 1058 | "jsonschema", 1059 | SchemaVer.Full(1, 0, 1) 1060 | ), 1061 | JsonObject( 1062 | ("targetUrl", "http://www.example.com".asJson), 1063 | ("elementClasses", List("foreground").asJson), 1064 | ("elementId", "exampleLink".asJson) 1065 | ).asJson 1066 | ) 1067 | ) 1068 | ) 1069 | ) 1070 | val eventJson = event.toJson(false) 1071 | eventJson.as[Event] must beRight(event) 1072 | } 1073 | 1074 | "successfully decode encoded event which has both contexts and unstruct_event" in { 1075 | val event = baseExpected.copy( 1076 | contexts = Contexts( 1077 | List( 1078 | SelfDescribingData( 1079 | SchemaKey( 1080 | "org.schema", 1081 | "WebPage", 1082 | "jsonschema", 1083 | SchemaVer.Full(1, 0, 0) 1084 | ), 1085 | JsonObject( 1086 | ("genre", "blog".asJson), 1087 | ("inLanguage", "en-US".asJson), 1088 | ("datePublished", "2014-11-06T00:00:00Z".asJson), 1089 | ("author", "Fred Blundun".asJson), 1090 | ("breadcrumb", List("blog", "releases").asJson), 1091 | ("keywords", List("snowplow", "javascript", "tracker", "event").asJson) 1092 | ).asJson 1093 | ), 1094 | SelfDescribingData( 1095 | SchemaKey( 1096 | "org.w3", 1097 | "PerformanceTiming", 1098 | "jsonschema", 1099 | SchemaVer.Full(1, 0, 0) 1100 | ), 1101 | JsonObject( 1102 | ("navigationStart", 1415358089861L.asJson), 1103 | ("unloadEventStart", 1415358090270L.asJson), 1104 | ("unloadEventEnd", 1415358090287L.asJson), 1105 | ("redirectStart", 0.asJson), 1106 | ("redirectEnd", 0.asJson), 1107 | ("fetchStart", 1415358089870L.asJson), 1108 | ("domainLookupStart", 1415358090102L.asJson), 1109 | ("domainLookupEnd", 1415358090102L.asJson), 1110 | ("connectStart", 1415358090103L.asJson), 1111 | ("connectEnd", 1415358090183L.asJson), 1112 | ("requestStart", 1415358090183L.asJson), 1113 | ("responseStart", 1415358090265L.asJson), 1114 | ("responseEnd", 1415358090265L.asJson), 1115 | ("domLoading", 1415358090270L.asJson), 1116 | ("domInteractive", 1415358090886L.asJson), 1117 | ("domContentLoadedEventStart", 1415358090968L.asJson), 1118 | ("domContentLoadedEventEnd", 1415358091309L.asJson), 1119 | ("domComplete", 0.asJson), 1120 | ("loadEventStart", 0.asJson), 1121 | ("loadEventEnd", 0.asJson) 1122 | ).asJson 1123 | ) 1124 | ) 1125 | ), 1126 | unstruct_event = UnstructEvent( 1127 | Some( 1128 | SelfDescribingData( 1129 | SchemaKey( 1130 | "com.snowplowanalytics.snowplow", 1131 | "link_click", 1132 | "jsonschema", 1133 | SchemaVer.Full(1, 0, 1) 1134 | ), 1135 | JsonObject( 1136 | ("targetUrl", "http://www.example.com".asJson), 1137 | ("elementClasses", List("foreground").asJson), 1138 | ("elementId", "exampleLink".asJson) 1139 | ).asJson 1140 | ) 1141 | ) 1142 | ), 1143 | derived_contexts = Contexts( 1144 | List( 1145 | SelfDescribingData( 1146 | SchemaKey( 1147 | "com.snowplowanalytics.snowplow", 1148 | "ua_parser_context", 1149 | "jsonschema", 1150 | SchemaVer.Full(1, 0, 0) 1151 | ), 1152 | JsonObject( 1153 | ("useragentFamily", "IE".asJson), 1154 | ("useragentMajor", "7".asJson), 1155 | ("useragentMinor", "0".asJson), 1156 | ("useragentPatch", Json.Null), 1157 | ("useragentVersion", "IE 7.0".asJson), 1158 | ("osFamily", "Windows XP".asJson), 1159 | ("osMajor", Json.Null), 1160 | ("osMinor", Json.Null), 1161 | ("osPatch", Json.Null), 1162 | ("osPatchMinor", Json.Null), 1163 | ("osVersion", "Windows XP".asJson), 1164 | ("deviceFamily", "Other".asJson) 1165 | ).asJson 1166 | ) 1167 | ) 1168 | ) 1169 | ) 1170 | val eventJson = event.toJson(false) 1171 | eventJson.as[Event] must beRight(event) 1172 | } 1173 | 1174 | "successfully decode object with event which has no contexts or unstruct_event" in { 1175 | case class Temp(event: Event) 1176 | implicit val tempClassJsonEncoder: Encoder[Temp] = deriveEncoder 1177 | implicit val tempClassJsonDecoder: Decoder[Temp] = deriveDecoder 1178 | val event = baseExpected 1179 | val tempInstance = Temp(event) 1180 | val tempJsonStr = tempInstance.asJson.noSpaces 1181 | val tempJson = parse(tempJsonStr).getOrElse(throw new RuntimeException("Error while converting to json")) 1182 | tempJson.as[Temp].map(_.event) must beRight(event) 1183 | } 1184 | 1185 | "optionally truncate events with oversized fields" in { 1186 | 1187 | val input = baseInput.map { 1188 | case ("app_id", _) => ("app_id", "x" * 256) 1189 | case other => other 1190 | } 1191 | 1192 | val expected1 = baseExpected.copy( 1193 | app_id = Some("x" * 256) 1194 | ) 1195 | 1196 | val expected2 = baseExpected.copy( 1197 | app_id = Some("x" * 10) 1198 | ) 1199 | 1200 | val eventValues = input.unzip._2.mkString("\t") 1201 | 1202 | Event.parse(eventValues) mustEqual Valid(expected1) 1203 | Event.parser().parse(eventValues) mustEqual Valid(expected1) 1204 | Event.parser(Map("app_id" -> 10)).parse(eventValues) mustEqual Valid(expected2) 1205 | } 1206 | } 1207 | 1208 | "The transformSchema method" should { 1209 | "successfully convert schemas into snake_case" in { 1210 | SnowplowEvent.transformSchema(Data.Contexts(Data.CustomContexts), 1211 | "org.w3", 1212 | "PerformanceTiming", 1213 | 1 1214 | ) mustEqual "contexts_org_w3_performance_timing_1" 1215 | SnowplowEvent.transformSchema(Data.Contexts(Data.CustomContexts), 1216 | SchemaKey("org.w3", "PerformanceTiming", "jsonschema", SchemaVer.Full(1, 0, 0)) 1217 | ) mustEqual "contexts_org_w3_performance_timing_1" 1218 | SnowplowEvent.transformSchema(Data.Contexts(Data.CustomContexts), 1219 | "com.snowplowanalytics.snowplow", 1220 | "ua_parser_context", 1221 | 1 1222 | ) mustEqual "contexts_com_snowplowanalytics_snowplow_ua_parser_context_1" 1223 | SnowplowEvent.transformSchema(Data.UnstructEvent, 1224 | "com.snowplowanalytics.self-desc", 1225 | "schema", 1226 | 1 1227 | ) mustEqual "unstruct_event_com_snowplowanalytics_self_desc_schema_1" 1228 | } 1229 | } 1230 | 1231 | "Parsing the result of toTSV should produce the same event" in { 1232 | forAll(EventGen.event) { e => 1233 | Event.parse(e.toTsv) mustEqual (Valid(e)) 1234 | } 1235 | } 1236 | } 1237 | 1238 | object EventSpec { 1239 | val unstructJson = 1240 | """{ 1241 | "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", 1242 | "data": { 1243 | "schema": "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1", 1244 | "data": { 1245 | "targetUrl": "http://www.example.com", 1246 | "elementClasses": ["foreground"], 1247 | "elementId": "exampleLink" 1248 | } 1249 | } 1250 | }""" 1251 | 1252 | val contextsJson = 1253 | """{ 1254 | "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", 1255 | "data": [ 1256 | { 1257 | "schema": "iglu:org.schema/WebPage/jsonschema/1-0-0", 1258 | "data": { 1259 | "genre": "blog", 1260 | "inLanguage": "en-US", 1261 | "datePublished": "2014-11-06T00:00:00Z", 1262 | "author": "Fred Blundun", 1263 | "breadcrumb": [ 1264 | "blog", 1265 | "releases" 1266 | ], 1267 | "keywords": [ 1268 | "snowplow", 1269 | "javascript", 1270 | "tracker", 1271 | "event" 1272 | ] 1273 | } 1274 | }, 1275 | { 1276 | "schema": "iglu:org.w3/PerformanceTiming/jsonschema/1-0-0", 1277 | "data": { 1278 | "navigationStart": 1415358089861, 1279 | "unloadEventStart": 1415358090270, 1280 | "unloadEventEnd": 1415358090287, 1281 | "redirectStart": 0, 1282 | "redirectEnd": 0, 1283 | "fetchStart": 1415358089870, 1284 | "domainLookupStart": 1415358090102, 1285 | "domainLookupEnd": 1415358090102, 1286 | "connectStart": 1415358090103, 1287 | "connectEnd": 1415358090183, 1288 | "requestStart": 1415358090183, 1289 | "responseStart": 1415358090265, 1290 | "responseEnd": 1415358090265, 1291 | "domLoading": 1415358090270, 1292 | "domInteractive": 1415358090886, 1293 | "domContentLoadedEventStart": 1415358090968, 1294 | "domContentLoadedEventEnd": 1415358091309, 1295 | "domComplete": 0, 1296 | "loadEventStart": 0, 1297 | "loadEventEnd": 0 1298 | } 1299 | } 1300 | ] 1301 | }""" 1302 | 1303 | val contextsWithDuplicate = """{ 1304 | "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", 1305 | "data": [ 1306 | { 1307 | "schema": "iglu:org.schema/WebPage/jsonschema/1-0-0", 1308 | "data": { 1309 | "genre": "blog", 1310 | "inLanguage": "en-US", 1311 | "datePublished": "2014-11-06T00:00:00Z", 1312 | "author": "Fred Blundun", 1313 | "breadcrumb": [ 1314 | "blog", 1315 | "releases" 1316 | ], 1317 | "keywords": [ 1318 | "snowplow", 1319 | "javascript", 1320 | "tracker", 1321 | "event" 1322 | ] 1323 | } 1324 | }, 1325 | { 1326 | "schema": "iglu:org.acme/context_one/jsonschema/1-0-0", 1327 | "data": { 1328 | "item": 1 1329 | } 1330 | }, 1331 | { 1332 | "schema": "iglu:org.acme/context_one/jsonschema/1-0-1", 1333 | "data": { 1334 | "item": 2 1335 | } 1336 | } 1337 | ] 1338 | }""" 1339 | 1340 | val derivedContextsJson = 1341 | """{ 1342 | "schema": "iglu:com.snowplowanalytics.snowplow\/contexts\/jsonschema\/1-0-1", 1343 | "data": [ 1344 | { 1345 | "schema": "iglu:com.snowplowanalytics.snowplow\/ua_parser_context\/jsonschema\/1-0-0", 1346 | "data": { 1347 | "useragentFamily": "IE", 1348 | "useragentMajor": "7", 1349 | "useragentMinor": "0", 1350 | "useragentPatch": null, 1351 | "useragentVersion": "IE 7.0", 1352 | "osFamily": "Windows XP", 1353 | "osMajor": null, 1354 | "osMinor": null, 1355 | "osPatch": null, 1356 | "osPatchMinor": null, 1357 | "osVersion": "Windows XP", 1358 | "deviceFamily": "Other" 1359 | } 1360 | } 1361 | ] 1362 | }""" 1363 | 1364 | val baseInput = List( 1365 | "app_id" -> "angry-birds", 1366 | "platform" -> "web", 1367 | "etl_tstamp" -> "2017-01-26 00:01:25.292", 1368 | "collector_tstamp" -> "2013-11-26 00:02:05", 1369 | "dvce_created_tstamp" -> "2013-11-26 00:03:57.885", 1370 | "event" -> "page_view", 1371 | "event_id" -> "c6ef3124-b53a-4b13-a233-0088f79dcbcb", 1372 | "txn_id" -> "41828", 1373 | "name_tracker" -> "cloudfront-1", 1374 | "v_tracker" -> "js-2.1.0", 1375 | "v_collector" -> "clj-tomcat-0.1.0", 1376 | "v_etl" -> "serde-0.5.2", 1377 | "user_id" -> "jon.doe@email.com", 1378 | "user_ipaddress" -> "92.231.54.234", 1379 | "user_fingerprint" -> "2161814971", 1380 | "domain_userid" -> "bc2e92ec6c204a14", 1381 | "domain_sessionidx" -> "3", 1382 | "network_userid" -> "ecdff4d0-9175-40ac-a8bb-325c49733607", 1383 | "geo_country" -> "US", 1384 | "geo_region" -> "TX", 1385 | "geo_city" -> "New York", 1386 | "geo_zipcode" -> "94109", 1387 | "geo_latitude" -> "37.443604", 1388 | "geo_longitude" -> "-122.4124", 1389 | "geo_region_name" -> "Florida", 1390 | "ip_isp" -> "FDN Communications", 1391 | "ip_organization" -> "Bouygues Telecom", 1392 | "ip_domain" -> "nuvox.net", 1393 | "ip_netspeed" -> "Cable/DSL", 1394 | "page_url" -> "http://www.snowplowanalytics.com", 1395 | "page_title" -> "On Analytics", 1396 | "page_referrer" -> "", 1397 | "page_urlscheme" -> "http", 1398 | "page_urlhost" -> "www.snowplowanalytics.com", 1399 | "page_urlport" -> "80", 1400 | "page_urlpath" -> "/product/index.html", 1401 | "page_urlquery" -> "id=GTM-DLRG", 1402 | "page_urlfragment" -> "4-conclusion", 1403 | "refr_urlscheme" -> "", 1404 | "refr_urlhost" -> "", 1405 | "refr_urlport" -> "", 1406 | "refr_urlpath" -> "", 1407 | "refr_urlquery" -> "", 1408 | "refr_urlfragment" -> "", 1409 | "refr_medium" -> "", 1410 | "refr_source" -> "", 1411 | "refr_term" -> "", 1412 | "mkt_medium" -> "", 1413 | "mkt_source" -> "", 1414 | "mkt_term" -> "", 1415 | "mkt_content" -> "", 1416 | "mkt_campaign" -> "", 1417 | "contexts" -> "", 1418 | "se_category" -> "", 1419 | "se_action" -> "", 1420 | "se_label" -> "", 1421 | "se_property" -> "", 1422 | "se_value" -> "", 1423 | "unstruct_event" -> "", 1424 | "tr_orderid" -> "", 1425 | "tr_affiliation" -> "", 1426 | "tr_total" -> "", 1427 | "tr_tax" -> "", 1428 | "tr_shipping" -> "", 1429 | "tr_city" -> "", 1430 | "tr_state" -> "", 1431 | "tr_country" -> "", 1432 | "ti_orderid" -> "", 1433 | "ti_sku" -> "", 1434 | "ti_name" -> "", 1435 | "ti_category" -> "", 1436 | "ti_price" -> "", 1437 | "ti_quantity" -> "", 1438 | "pp_xoffset_min" -> "", 1439 | "pp_xoffset_max" -> "", 1440 | "pp_yoffset_min" -> "", 1441 | "pp_yoffset_max" -> "", 1442 | "useragent" -> "", 1443 | "br_name" -> "", 1444 | "br_family" -> "", 1445 | "br_version" -> "", 1446 | "br_type" -> "", 1447 | "br_renderengine" -> "", 1448 | "br_lang" -> "", 1449 | "br_features_pdf" -> "1", 1450 | "br_features_flash" -> "0", 1451 | "br_features_java" -> "", 1452 | "br_features_director" -> "", 1453 | "br_features_quicktime" -> "", 1454 | "br_features_realplayer" -> "", 1455 | "br_features_windowsmedia" -> "", 1456 | "br_features_gears" -> "", 1457 | "br_features_silverlight" -> "", 1458 | "br_cookies" -> "", 1459 | "br_colordepth" -> "", 1460 | "br_viewwidth" -> "", 1461 | "br_viewheight" -> "", 1462 | "os_name" -> "", 1463 | "os_family" -> "", 1464 | "os_manufacturer" -> "", 1465 | "os_timezone" -> "", 1466 | "dvce_type" -> "", 1467 | "dvce_ismobile" -> "", 1468 | "dvce_screenwidth" -> "", 1469 | "dvce_screenheight" -> "", 1470 | "doc_charset" -> "", 1471 | "doc_width" -> "", 1472 | "doc_height" -> "", 1473 | "tr_currency" -> "", 1474 | "tr_total_base" -> "", 1475 | "tr_tax_base" -> "", 1476 | "tr_shipping_base" -> "", 1477 | "ti_currency" -> "", 1478 | "ti_price_base" -> "", 1479 | "base_currency" -> "", 1480 | "geo_timezone" -> "", 1481 | "mkt_clickid" -> "", 1482 | "mkt_network" -> "", 1483 | "etl_tags" -> "", 1484 | "dvce_sent_tstamp" -> "", 1485 | "refr_domain_userid" -> "", 1486 | "refr_dvce_tstamp" -> "", 1487 | "derived_contexts" -> "", 1488 | "domain_sessionid" -> "2b15e5c8-d3b1-11e4-b9d6-1681e6b88ec1", 1489 | "derived_tstamp" -> "2013-11-26 00:03:57.886", 1490 | "event_vendor" -> "com.snowplowanalytics.snowplow", 1491 | "event_name" -> "link_click", 1492 | "event_format" -> "jsonschema", 1493 | "event_version" -> "1-0-0", 1494 | "event_fingerprint" -> "e3dbfa9cca0412c3d4052863cefb547f", 1495 | "true_tstamp" -> "2013-11-26 00:03:57.886" 1496 | ) 1497 | 1498 | val baseExpected = Event( 1499 | app_id = Some("angry-birds"), 1500 | platform = Some("web"), 1501 | etl_tstamp = Some(Instant.parse("2017-01-26T00:01:25.292Z")), 1502 | collector_tstamp = Instant.parse("2013-11-26T00:02:05Z"), 1503 | dvce_created_tstamp = Some(Instant.parse("2013-11-26T00:03:57.885Z")), 1504 | event = Some("page_view"), 1505 | event_id = UUID.fromString("c6ef3124-b53a-4b13-a233-0088f79dcbcb"), 1506 | txn_id = Some(41828), 1507 | name_tracker = Some("cloudfront-1"), 1508 | v_tracker = Some("js-2.1.0"), 1509 | v_collector = "clj-tomcat-0.1.0", 1510 | v_etl = "serde-0.5.2", 1511 | user_id = Some("jon.doe@email.com"), 1512 | user_ipaddress = Some("92.231.54.234"), 1513 | user_fingerprint = Some("2161814971"), 1514 | domain_userid = Some("bc2e92ec6c204a14"), 1515 | domain_sessionidx = Some(3), 1516 | network_userid = Some("ecdff4d0-9175-40ac-a8bb-325c49733607"), 1517 | geo_country = Some("US"), 1518 | geo_region = Some("TX"), 1519 | geo_city = Some("New York"), 1520 | geo_zipcode = Some("94109"), 1521 | geo_latitude = Some(37.443604), 1522 | geo_longitude = Some(-122.4124), 1523 | geo_region_name = Some("Florida"), 1524 | ip_isp = Some("FDN Communications"), 1525 | ip_organization = Some("Bouygues Telecom"), 1526 | ip_domain = Some("nuvox.net"), 1527 | ip_netspeed = Some("Cable/DSL"), 1528 | page_url = Some("http://www.snowplowanalytics.com"), 1529 | page_title = Some("On Analytics"), 1530 | page_referrer = None, 1531 | page_urlscheme = Some("http"), 1532 | page_urlhost = Some("www.snowplowanalytics.com"), 1533 | page_urlport = Some(80), 1534 | page_urlpath = Some("/product/index.html"), 1535 | page_urlquery = Some("id=GTM-DLRG"), 1536 | page_urlfragment = Some("4-conclusion"), 1537 | refr_urlscheme = None, 1538 | refr_urlhost = None, 1539 | refr_urlport = None, 1540 | refr_urlpath = None, 1541 | refr_urlquery = None, 1542 | refr_urlfragment = None, 1543 | refr_medium = None, 1544 | refr_source = None, 1545 | refr_term = None, 1546 | mkt_medium = None, 1547 | mkt_source = None, 1548 | mkt_term = None, 1549 | mkt_content = None, 1550 | mkt_campaign = None, 1551 | contexts = Contexts(List()), 1552 | se_category = None, 1553 | se_action = None, 1554 | se_label = None, 1555 | se_property = None, 1556 | se_value = None, 1557 | unstruct_event = UnstructEvent(None), 1558 | tr_orderid = None, 1559 | tr_affiliation = None, 1560 | tr_total = None, 1561 | tr_tax = None, 1562 | tr_shipping = None, 1563 | tr_city = None, 1564 | tr_state = None, 1565 | tr_country = None, 1566 | ti_orderid = None, 1567 | ti_sku = None, 1568 | ti_name = None, 1569 | ti_category = None, 1570 | ti_price = None, 1571 | ti_quantity = None, 1572 | pp_xoffset_min = None, 1573 | pp_xoffset_max = None, 1574 | pp_yoffset_min = None, 1575 | pp_yoffset_max = None, 1576 | useragent = None, 1577 | br_name = None, 1578 | br_family = None, 1579 | br_version = None, 1580 | br_type = None, 1581 | br_renderengine = None, 1582 | br_lang = None, 1583 | br_features_pdf = Some(true), 1584 | br_features_flash = Some(false), 1585 | br_features_java = None, 1586 | br_features_director = None, 1587 | br_features_quicktime = None, 1588 | br_features_realplayer = None, 1589 | br_features_windowsmedia = None, 1590 | br_features_gears = None, 1591 | br_features_silverlight = None, 1592 | br_cookies = None, 1593 | br_colordepth = None, 1594 | br_viewwidth = None, 1595 | br_viewheight = None, 1596 | os_name = None, 1597 | os_family = None, 1598 | os_manufacturer = None, 1599 | os_timezone = None, 1600 | dvce_type = None, 1601 | dvce_ismobile = None, 1602 | dvce_screenwidth = None, 1603 | dvce_screenheight = None, 1604 | doc_charset = None, 1605 | doc_width = None, 1606 | doc_height = None, 1607 | tr_currency = None, 1608 | tr_total_base = None, 1609 | tr_tax_base = None, 1610 | tr_shipping_base = None, 1611 | ti_currency = None, 1612 | ti_price_base = None, 1613 | base_currency = None, 1614 | geo_timezone = None, 1615 | mkt_clickid = None, 1616 | mkt_network = None, 1617 | etl_tags = None, 1618 | dvce_sent_tstamp = None, 1619 | refr_domain_userid = None, 1620 | refr_dvce_tstamp = None, 1621 | derived_contexts = Contexts(List()), 1622 | domain_sessionid = Some("2b15e5c8-d3b1-11e4-b9d6-1681e6b88ec1"), 1623 | derived_tstamp = Some(Instant.parse("2013-11-26T00:03:57.886Z")), 1624 | event_vendor = Some("com.snowplowanalytics.snowplow"), 1625 | event_name = Some("link_click"), 1626 | event_format = Some("jsonschema"), 1627 | event_version = Some("1-0-0"), 1628 | event_fingerprint = Some("e3dbfa9cca0412c3d4052863cefb547f"), 1629 | true_tstamp = Some(Instant.parse("2013-11-26T00:03:57.886Z")) 1630 | ) 1631 | } 1632 | -------------------------------------------------------------------------------- /src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/ParsingErrorSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.snowplow.analytics.scalasdk 14 | 15 | import cats.data.NonEmptyList 16 | import io.circe.{Decoder, DecodingFailure, Json, parser} 17 | import io.circe.syntax._ 18 | import io.circe.parser._ 19 | import com.snowplowanalytics.snowplow.analytics.scalasdk.validate.FIELD_SIZES 20 | import com.snowplowanalytics.snowplow.analytics.scalasdk.ParsingError._ 21 | import com.snowplowanalytics.snowplow.analytics.scalasdk.ParsingError.RowDecodingErrorInfo._ 22 | import org.specs2.Specification 23 | 24 | import java.time.Instant 25 | import cats.data.Validated.{Invalid, Valid} 26 | 27 | import java.util.UUID 28 | 29 | class ParsingErrorSpec extends Specification { 30 | def is = s2""" 31 | ParsingError encoder-decoder 32 | works correctly with NotTSV error $e1 33 | works correctly with FieldNumberMismatch error $e2 34 | works correctly with RowDecodingError $e3 35 | works correctly with JSON oversized columns $e5 36 | """ 37 | 38 | def e1 = { 39 | val errorJson = parseJson( 40 | """ 41 | |{ 42 | | "type": "NotTSV" 43 | |} 44 | """.stripMargin 45 | ) 46 | 47 | val decoded = decodeJson[ParsingError](errorJson) 48 | val encoded = decoded.asJson 49 | 50 | (decoded must beEqualTo(NotTSV)) and (encoded must beEqualTo(errorJson)) 51 | } 52 | 53 | def e2 = { 54 | val errorJson = parseJson( 55 | """ 56 | |{ 57 | | "type": "FieldNumberMismatch", 58 | | "fieldCount": 120 59 | |} 60 | """.stripMargin 61 | ) 62 | 63 | val decoded = decodeJson[ParsingError](errorJson) 64 | val encoded = decoded.asJson 65 | 66 | (decoded must beEqualTo(FieldNumberMismatch(120))) and (encoded must beEqualTo(errorJson)) 67 | } 68 | 69 | def e3 = { 70 | val errorJson = parseJson( 71 | """ 72 | |{ 73 | | "type": "RowDecodingError", 74 | | "errors": [ 75 | | { 76 | | "type": "InvalidValue", 77 | | "key": "exampleKey", 78 | | "value": "exampleValue", 79 | | "message": "exampleMessage" 80 | | }, 81 | | { 82 | | "type": "UnhandledRowDecodingError", 83 | | "message": "exampleError" 84 | | } 85 | | ] 86 | |} 87 | """.stripMargin 88 | ) 89 | 90 | val decoded = decodeJson[ParsingError](errorJson) 91 | val encoded = decoded.asJson 92 | 93 | val expected = RowDecodingError( 94 | NonEmptyList.of( 95 | InvalidValue(Symbol("exampleKey"), "exampleValue", "exampleMessage"), 96 | UnhandledRowDecodingError("exampleError") 97 | ) 98 | ) 99 | 100 | (decoded must beEqualTo(expected)) and (encoded must beEqualTo(errorJson)) 101 | } 102 | 103 | def e4 = { 104 | // no field length validation since version 3.1.0 105 | val badEvent = Event.minimal(UUID.randomUUID(), Instant.now(), "v" * 101, "v_etl").copy(geo_country = Some("sssss")) 106 | (Event.parser(FIELD_SIZES).parse(badEvent.toTsv) must haveClass[Valid[_]]) and 107 | (Event.parser(Map.empty).parse(badEvent.toTsv) must haveClass[Valid[_]]) 108 | } 109 | 110 | def e5 = 111 | // no field length validation since version 3.1.0 112 | parser.decode[Event](s"""{ 113 | "app_id" : "bbb05861-0f11-4986-b23b-87e6e22609b1", 114 | "collector_tstamp" : "2021-12-06T15:47:07.920430Z", 115 | "event_id" : "bbb05861-0f11-4986-b23b-87e6e22609be", 116 | "v_collector" : "${"v" * 101}", 117 | "v_etl" : "v_etl", 118 | "geo_country" : "sssss", 119 | "contexts" : {}, 120 | "unstruct_event": {}, 121 | "derived_contexts" : {} 122 | }""".stripMargin) must beRight 123 | 124 | private def parseJson(jsonStr: String): Json = 125 | parse(jsonStr).getOrElse(throw new RuntimeException("Failed to parse expected JSON.")) 126 | 127 | private def decodeJson[A: Decoder](json: Json): A = 128 | json.as[A].getOrElse(throw new RuntimeException("Failed to decode to ParsingError.")) 129 | } 130 | -------------------------------------------------------------------------------- /src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/SnowplowEventSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2020 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | 14 | package com.snowplowanalytics.snowplow.analytics.scalasdk 15 | 16 | // java 17 | import java.time.Instant 18 | import java.util.UUID 19 | import java.nio.ByteBuffer 20 | import java.nio.charset.StandardCharsets 21 | 22 | // cats 23 | import cats.data.Validated.{Invalid, Valid} 24 | import cats.data.NonEmptyList 25 | import cats.syntax.either._ 26 | 27 | // circe 28 | import io.circe.{Decoder, Encoder, Json, JsonObject} 29 | import io.circe.syntax._ 30 | import io.circe.parser._ 31 | import io.circe.generic.semiauto._ 32 | import io.circe.literal._ 33 | 34 | // Specs2 35 | import org.specs2.mutable.Specification 36 | 37 | // Iglu 38 | import com.snowplowanalytics.iglu.core.{SchemaKey, SelfDescribingData} 39 | 40 | /** 41 | * Tests Event case class 42 | */ 43 | class SnowplowEventSpec extends Specification { 44 | import EventSpec._ 45 | 46 | "Contexts toShreddedJson" should { 47 | "return a map of JSON entities, keyed by column name" in { 48 | 49 | val sdd1 = SelfDescribingData[Json](SchemaKey.fromUri("iglu:myvendor1/myname1/jsonschema/1-2-3").toOption.get, json"""{"xyz": 42}""") 50 | val sdd2 = 51 | SelfDescribingData[Json](SchemaKey.fromUri("iglu:myvendor2/myname2/jsonschema/2-3-4").toOption.get, json"""{"abc": true}""") 52 | 53 | val input = SnowplowEvent.Contexts(List(sdd1, sdd2)) 54 | 55 | val result = input.toShreddedJson 56 | 57 | val expected = Map( 58 | "contexts_myvendor1_myname1_1" -> json"""[{"_schema_version": "1-2-3", "xyz": 42}]""", 59 | "contexts_myvendor2_myname2_2" -> json"""[{"_schema_version": "2-3-4", "abc": true}]""" 60 | ) 61 | 62 | result must beEqualTo(expected) 63 | 64 | } 65 | 66 | "return a map of JSON entities for all types of JSON value (object, array, string, number, boolean, null)" in { 67 | 68 | def sdd(version: Int, v: Json): SelfDescribingData[Json] = 69 | SelfDescribingData[Json](SchemaKey.fromUri(s"iglu:myvendor/myname/jsonschema/$version-0-0").toOption.get, v) 70 | 71 | val input = SnowplowEvent.Contexts( 72 | List( 73 | sdd(1, json"""{"xyz": 123}"""), 74 | sdd(2, json"""[1, 2, 3]"""), 75 | sdd(3, json""""foo""""), 76 | sdd(4, json"""42"""), 77 | sdd(5, json"""true"""), 78 | sdd(6, json"""null""") 79 | ) 80 | ) 81 | 82 | val result = input.toShreddedJson 83 | 84 | val expected = Map( 85 | "contexts_myvendor_myname_1" -> json"""[{"_schema_version": "1-0-0", "xyz": 123}]""", 86 | "contexts_myvendor_myname_2" -> json"""[[1, 2, 3]]""", 87 | "contexts_myvendor_myname_3" -> json"""["foo"]""", 88 | "contexts_myvendor_myname_4" -> json"""[42]""", 89 | "contexts_myvendor_myname_5" -> json"""[true]""", 90 | "contexts_myvendor_myname_6" -> json"""[null]""" 91 | ) 92 | 93 | result must beEqualTo(expected) 94 | 95 | } 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/ValueDecoderSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | 14 | package com.snowplowanalytics.snowplow.analytics.scalasdk.decode 15 | 16 | // java 17 | import java.time.Instant 18 | import java.util.UUID 19 | 20 | // circe 21 | import io.circe.{Json, JsonObject} 22 | import io.circe.syntax._ 23 | import io.circe.parser._ 24 | 25 | // cats 26 | import cats.syntax.either._ 27 | 28 | // Specs2 29 | import org.specs2.mutable.Specification 30 | 31 | // Iglu 32 | import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} 33 | 34 | // This library 35 | import com.snowplowanalytics.snowplow.analytics.scalasdk.SnowplowEvent.{Contexts, UnstructEvent} 36 | import com.snowplowanalytics.snowplow.analytics.scalasdk.ParsingError.RowDecodingErrorInfo._ 37 | 38 | /** 39 | * Tests ValueDecoder class 40 | */ 41 | class ValueDecoderSpec extends Specification { 42 | 43 | "The ValueDecoder class" should { 44 | "parse String and Option[String] values" in { 45 | ValueDecoder[String].parse(Symbol("key"), "", None) mustEqual InvalidValue(Symbol("key"), "", "Field key cannot be empty").asLeft 46 | ValueDecoder[String].parse(Symbol("key"), "value", None) mustEqual "value".asRight 47 | ValueDecoder[String].parse(Symbol("key"), "value", Some(5)) mustEqual "value".asRight 48 | ValueDecoder[String].parse(Symbol("key"), "value", Some(4)) mustEqual "valu".asRight 49 | ValueDecoder[Option[String]].parse(Symbol("key"), "", None) mustEqual None.asRight 50 | ValueDecoder[Option[String]].parse(Symbol("key"), "value", None) mustEqual Some("value").asRight 51 | ValueDecoder[Option[String]].parse(Symbol("key"), "value", Some(5)) mustEqual Some("value").asRight 52 | ValueDecoder[Option[String]].parse(Symbol("key"), "value", Some(4)) mustEqual Some("valu").asRight 53 | } 54 | 55 | "parse Option[Int] values" in { 56 | ValueDecoder[Option[Int]].parse(Symbol("key"), "", None) mustEqual None.asRight 57 | ValueDecoder[Option[Int]].parse(Symbol("key"), "42", None) mustEqual Some(42).asRight 58 | ValueDecoder[Option[Int]].parse(Symbol("key"), "42", None) mustEqual Some(42).asRight 59 | ValueDecoder[Option[Int]].parse(Symbol("key"), "value", None) mustEqual InvalidValue(Symbol("key"), 60 | "value", 61 | "Cannot parse key key into integer" 62 | ).asLeft 63 | } 64 | 65 | "parse UUID values" in { 66 | ValueDecoder[UUID].parse(Symbol("key"), "", None) mustEqual InvalidValue(Symbol("key"), "", "Field key cannot be empty").asLeft 67 | ValueDecoder[UUID].parse(Symbol("key"), "d2161fd1-ffed-41df-ac3e-a729012105f5", None) mustEqual UUID 68 | .fromString("d2161fd1-ffed-41df-ac3e-a729012105f5") 69 | .asRight 70 | ValueDecoder[UUID].parse(Symbol("key"), "value", None) mustEqual InvalidValue(Symbol("key"), 71 | "value", 72 | "Cannot parse key key into UUID" 73 | ).asLeft 74 | } 75 | 76 | "parse Option[Boolean] values" in { 77 | ValueDecoder[Option[Boolean]].parse(Symbol("key"), "", None) mustEqual None.asRight 78 | ValueDecoder[Option[Boolean]].parse(Symbol("key"), "0", None) mustEqual Some(false).asRight 79 | ValueDecoder[Option[Boolean]].parse(Symbol("key"), "1", None) mustEqual Some(true).asRight 80 | ValueDecoder[Option[Boolean]].parse(Symbol("key"), "value", None) mustEqual InvalidValue( 81 | Symbol("key"), 82 | "value", 83 | "Cannot parse key key into boolean" 84 | ).asLeft 85 | } 86 | 87 | "parse Option[Double] values" in { 88 | ValueDecoder[Option[Double]].parse(Symbol("key"), "", None) mustEqual None.asRight 89 | ValueDecoder[Option[Double]].parse(Symbol("key"), "42.5", None) mustEqual Some(42.5).asRight 90 | ValueDecoder[Option[Double]].parse(Symbol("key"), "value", None) mustEqual InvalidValue(Symbol("key"), 91 | "value", 92 | "Cannot parse key key into double" 93 | ).asLeft 94 | } 95 | 96 | "parse Instant and Option[Instant] values" in { 97 | ValueDecoder[Instant].parse(Symbol("key"), "", None) mustEqual InvalidValue(Symbol("key"), "", "Field key cannot be empty").asLeft 98 | ValueDecoder[Instant] 99 | .parse(Symbol("key"), "2013-11-26 00:03:57.885", None) mustEqual Instant.parse("2013-11-26T00:03:57.885Z").asRight 100 | ValueDecoder[Instant].parse(Symbol("key"), "value", None) mustEqual InvalidValue(Symbol("key"), 101 | "value", 102 | "Cannot parse key key into datetime" 103 | ).asLeft 104 | ValueDecoder[Option[Instant]].parse(Symbol("key"), "", None) mustEqual None.asRight 105 | ValueDecoder[Option[Instant]].parse(Symbol("key"), "2013-11-26 00:03:57.885", None) mustEqual Some( 106 | Instant.parse("2013-11-26T00:03:57.885Z") 107 | ).asRight 108 | ValueDecoder[Option[Instant]].parse(Symbol("key"), "value", None) mustEqual InvalidValue( 109 | Symbol("key"), 110 | "value", 111 | "Cannot parse key key into datetime" 112 | ).asLeft 113 | } 114 | 115 | "parse Contexts values" in { 116 | val validContexts = 117 | """{ 118 | "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", 119 | "data": [ 120 | { 121 | "schema": "iglu:org.schema/WebPage/jsonschema/1-0-0", 122 | "data": { 123 | "genre": "blog", 124 | "inLanguage": "en-US", 125 | "datePublished": "2014-11-06T00:00:00Z", 126 | "author": "Fred Blundun", 127 | "breadcrumb": [ 128 | "blog", 129 | "releases" 130 | ], 131 | "keywords": [ 132 | "snowplow", 133 | "javascript", 134 | "tracker", 135 | "event" 136 | ] 137 | } 138 | } 139 | ] 140 | }""" 141 | val invalidPayloadContexts = 142 | """{ 143 | "schema": "iglu:invalid/schema/jsonschema/1-0-0", 144 | "data": [ 145 | { 146 | "schema": "iglu:org.schema/WebPage/jsonschema/1-0-0", 147 | "data": { 148 | "genre": "blog", 149 | "inLanguage": "en-US", 150 | "datePublished": "2014-11-06T00:00:00Z", 151 | "author": "Fred Blundun", 152 | "breadcrumb": [ 153 | "blog", 154 | "releases" 155 | ], 156 | "keywords": [ 157 | "snowplow", 158 | "javascript", 159 | "tracker", 160 | "event" 161 | ] 162 | } 163 | } 164 | ] 165 | }""" 166 | ValueDecoder[Contexts].parse(Symbol("key"), "", None) mustEqual Contexts(List()).asRight 167 | ValueDecoder[Contexts].parse(Symbol("key"), validContexts, None) mustEqual Contexts( 168 | List( 169 | SelfDescribingData( 170 | SchemaKey( 171 | "org.schema", 172 | "WebPage", 173 | "jsonschema", 174 | SchemaVer.Full(1, 0, 0) 175 | ), 176 | JsonObject( 177 | ("genre", "blog".asJson), 178 | ("inLanguage", "en-US".asJson), 179 | ("datePublished", "2014-11-06T00:00:00Z".asJson), 180 | ("author", "Fred Blundun".asJson), 181 | ("breadcrumb", List("blog", "releases").asJson), 182 | ("keywords", List("snowplow", "javascript", "tracker", "event").asJson) 183 | ).asJson 184 | ) 185 | ) 186 | ).asRight 187 | ValueDecoder[Contexts].parse(Symbol("key"), invalidPayloadContexts, None) mustEqual InvalidValue( 188 | Symbol("key"), 189 | invalidPayloadContexts, 190 | "Unknown payload: iglu:invalid/schema/jsonschema/1-0-0" 191 | ).asLeft 192 | } 193 | 194 | "parse UnstructEvent values" in { 195 | val validUnstruct = 196 | """{ 197 | "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", 198 | "data": { 199 | "schema": "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1", 200 | "data": { 201 | "targetUrl": "http://www.example.com", 202 | "elementClasses": ["foreground"], 203 | "elementId": "exampleLink" 204 | } 205 | } 206 | }""" 207 | val invalidPayloadUnstruct = 208 | """{ 209 | "schema": "iglu:invalid/schema/jsonschema/1-0-0", 210 | "data": { 211 | "schema": "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1", 212 | "data": { 213 | "targetUrl": "http://www.example.com", 214 | "elementClasses": ["foreground"], 215 | "elementId": "exampleLink" 216 | } 217 | } 218 | }""" 219 | ValueDecoder[UnstructEvent].parse(Symbol("key"), "", None) mustEqual UnstructEvent(None).asRight 220 | ValueDecoder[UnstructEvent].parse(Symbol("key"), validUnstruct, None) mustEqual UnstructEvent( 221 | Some( 222 | SelfDescribingData( 223 | SchemaKey( 224 | "com.snowplowanalytics.snowplow", 225 | "link_click", 226 | "jsonschema", 227 | SchemaVer.Full(1, 0, 1) 228 | ), 229 | JsonObject( 230 | ("targetUrl", "http://www.example.com".asJson), 231 | ("elementClasses", List("foreground").asJson), 232 | ("elementId", "exampleLink".asJson) 233 | ).asJson 234 | ) 235 | ) 236 | ).asRight 237 | ValueDecoder[UnstructEvent].parse(Symbol("key"), invalidPayloadUnstruct, None) mustEqual InvalidValue( 238 | Symbol("key"), 239 | invalidPayloadUnstruct, 240 | "Unknown payload: iglu:invalid/schema/jsonschema/1-0-0" 241 | ).asLeft 242 | } 243 | } 244 | } 245 | --------------------------------------------------------------------------------