├── .github
└── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
├── .gitignore
├── .travis.yml
├── CNAME
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── DaFlow.png
├── LICENSE
├── README.md
├── daflow-commons
├── README.md
├── pom.xml
└── src
│ ├── main
│ └── scala
│ │ └── com
│ │ └── abhioncbr
│ │ └── daflow
│ │ └── commons
│ │ ├── CommonConstants.scala
│ │ ├── Context.scala
│ │ ├── ContextConstantEnum.scala
│ │ ├── ExecutionResult.scala
│ │ ├── NotificationMessages.scala
│ │ ├── ProcessFrequencyEnum.scala
│ │ ├── conf
│ │ ├── DaFlowJobConf.scala
│ │ ├── JobStaticParamConf.scala
│ │ ├── common
│ │ │ ├── DataPath.scala
│ │ │ ├── FieldMappingConf.scala
│ │ │ ├── GeneralParamConf.scala
│ │ │ └── QueryConf.scala
│ │ ├── extract
│ │ │ ├── ExtractConf.scala
│ │ │ └── ExtractionType.scala
│ │ ├── load
│ │ │ ├── LoadConf.scala
│ │ │ ├── LoadType.scala
│ │ │ └── PartitioningDataConf.scala
│ │ └── transform
│ │ │ └── TransformConf.scala
│ │ └── util
│ │ └── FileUtil.scala
│ └── test
│ └── scala
│ └── com
│ └── abhioncbr
│ └── daflow
│ └── commons
│ ├── CommonSpec.scala
│ ├── Fixture.scala
│ └── util
│ └── FileUtilSpec.scala
├── daflow-core
├── README.md
├── pom.xml
└── src
│ └── main
│ └── scala
│ └── com
│ └── abhioncbr
│ └── daflow
│ └── core
│ ├── LaunchETLSparkJobExecution.scala
│ ├── extractData
│ ├── AbstractExtractData.scala
│ ├── ExtractData.scala
│ ├── ExtractDataFromDB.scala
│ ├── ExtractDataFromFileSystem.scala
│ ├── ExtractDataFromHive.scala
│ └── ExtractUtil.scala
│ ├── loadData
│ ├── LoadData.scala
│ ├── LoadDataIntoFileSystem.scala
│ ├── LoadDataIntoHive.scala
│ └── LoadUtil.scala
│ ├── transformData
│ ├── Transform.scala
│ ├── TransformData.scala
│ ├── TransformRule.scala
│ ├── TransformStep.scala
│ └── TransformUtil.scala
│ └── validateData
│ ├── ValidateData.scala
│ └── ValidateTransformedData.scala
├── daflow-examples
├── README.md
├── daflow-xml-templates
│ ├── extract_jdbc_import.xml
│ ├── extract_json_import.xml
│ ├── multiple_group_name.xml
│ └── multiple_transform_rule.xml
├── demo
│ ├── daflow-job-xml
│ │ └── json_etl_example.xml
│ └── sample-data
│ │ └── json_data.json
└── scripts
│ ├── execute_etl_feed.sh
│ └── execute_etl_feed_airflow.sh
├── daflow-job-conf
├── daflow-job-conf-xml
│ ├── README.md
│ ├── daflow-feed-job.xsd
│ ├── pom.xml
│ └── src
│ │ ├── main
│ │ └── scala
│ │ │ └── com
│ │ │ └── abhioncbr
│ │ │ └── daflow
│ │ │ └── job
│ │ │ └── conf
│ │ │ └── xml
│ │ │ ├── AttributeTags.scala
│ │ │ ├── NodeTags.scala
│ │ │ ├── ParseDaFlowJobXml.scala
│ │ │ ├── ParseDataPath.scala
│ │ │ ├── ParseExtract.scala
│ │ │ ├── ParseFieldMapping.scala
│ │ │ ├── ParseGeneralParams.scala
│ │ │ ├── ParseJobStaticParam.scala
│ │ │ ├── ParseLoad.scala
│ │ │ ├── ParsePartitioningData.scala
│ │ │ ├── ParseQuery.scala
│ │ │ ├── ParseTransform.scala
│ │ │ ├── ParseTransformRule.scala
│ │ │ └── ParseUtil.scala
│ │ └── test
│ │ └── scala
│ │ └── com
│ │ └── abhioncbr
│ │ └── daflow
│ │ └── job
│ │ └── conf
│ │ └── xml
│ │ ├── ParseDaFlowJobXmlSpec.scala
│ │ ├── ParseDataPathSpec.scala
│ │ ├── ParseExtractSpec.scala
│ │ ├── ParseFieldMappingSpec.scala
│ │ ├── ParseGeneralParamsSpec.scala
│ │ ├── ParseJobStaticParamSpec.scala
│ │ ├── ParseLoadSpec.scala
│ │ ├── ParsePartitioningRuleSpec.scala
│ │ ├── ParseQuerySpec.scala
│ │ ├── ParseTransformRuleSpec.scala
│ │ ├── ParseTransformSpec.scala
│ │ ├── ParseUtilSpec.scala
│ │ └── XmlJobConfBase.scala
└── daflow-job-conf-yaml
│ └── pom.xml
├── daflow-metrics
├── pom.xml
├── scripts
│ └── daflow-feed-stat.sh
├── sql
│ └── daflow-feed-stat
└── src
│ └── main
│ └── scala
│ └── com
│ └── abhioncbr
│ └── daflow
│ └── metrics
│ ├── promethus
│ └── PrometheusObject.scala
│ └── stats
│ ├── JobResult.scala
│ └── UpdateFeedStats.scala
├── daflow-sql-parser
├── README.md
├── pom.xml
└── src
│ ├── main
│ └── scala
│ │ └── com
│ │ └── abhioncbr
│ │ └── daflow
│ │ └── sqlParser
│ │ ├── QueryDsl.scala
│ │ └── SQLParser.scala
│ └── test
│ └── scala
│ └── com
│ └── abhioncbr
│ └── daflow
│ └── sqlParser
│ └── SqlParserSpec.scala
├── docker
├── compose
│ ├── docker-compose-daflow.yml
│ └── hadoop.env
├── images
│ ├── hadoop
│ │ ├── base
│ │ │ ├── Dockerfile
│ │ │ ├── entrypoint.sh
│ │ │ ├── export_container_ip.sh
│ │ │ └── pom.xml
│ │ ├── datanode
│ │ │ ├── Dockerfile
│ │ │ ├── pom.xml
│ │ │ └── run_dn.sh
│ │ ├── historyserver
│ │ │ ├── Dockerfile
│ │ │ ├── pom.xml
│ │ │ └── run_history.sh
│ │ └── namenode
│ │ │ ├── Dockerfile
│ │ │ ├── pom.xml
│ │ │ └── run_nn.sh
│ ├── hive
│ │ ├── Dockerfile
│ │ ├── conf
│ │ │ ├── beeline-log4j2.properties
│ │ │ ├── hive-env.sh
│ │ │ ├── hive-exec-log4j2.properties
│ │ │ ├── hive-log4j2.properties
│ │ │ ├── hive-site.xml
│ │ │ ├── ivysettings.xml
│ │ │ └── llap-daemon-log4j2.properties
│ │ ├── entrypoint.sh
│ │ ├── pom.xml
│ │ └── startup.sh
│ ├── pom.xml
│ └── spark
│ │ ├── adhoc
│ │ ├── Dockerfile
│ │ ├── adhoc.sh
│ │ └── pom.xml
│ │ ├── base
│ │ ├── Dockerfile
│ │ ├── execute-step.sh
│ │ ├── finish-step.sh
│ │ ├── pom.xml
│ │ └── wait-for-step.sh
│ │ ├── master
│ │ ├── Dockerfile
│ │ ├── master.sh
│ │ └── pom.xml
│ │ └── worker
│ │ ├── Dockerfile
│ │ ├── pom.xml
│ │ └── worker.sh
├── scripts
│ └── setup_demo_container.sh
├── setup_demo.sh
└── stop_demo.sh
├── pom.xml
└── style
├── checkstyle-suppressions.xml
├── checkstyle.xml
├── eclipse-java-google-style.xml
├── intellij-java-google-style.xml
└── scalastyle-config.xml
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | labels:
5 |
6 | ---
7 |
8 | **Describe the bug**
9 | A clear and concise description of what the bug is.
10 |
11 | **To Reproduce**
12 | Steps to reproduce the behavior:
13 | 1. Go to '...'
14 | 2. Click on '....'
15 | 3. Scroll down to '....'
16 | 4. See error
17 |
18 | **Expected behavior**
19 | A clear and concise description of what you expected to happen.
20 |
21 | **Screenshots**
22 | If applicable, add screenshots to help explain your problem.
23 |
24 | **Desktop (please complete the following information):**
25 | - OS: [e.g. iOS]
26 | - Browser [e.g. chrome, safari]
27 | - Version [e.g. 22]
28 |
29 | **Smartphone (please complete the following information):**
30 | - Device: [e.g. iPhone6]
31 | - OS: [e.g. iOS8.1]
32 | - Browser [e.g. stock browser, safari]
33 | - Version [e.g. 22]
34 |
35 | **Additional context**
36 | Add any other context about the problem here.
37 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | labels:
5 |
6 | ---
7 |
8 | **Is your feature request related to a problem? Please describe.**
9 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
10 |
11 | **Describe the solution you'd like**
12 | A clear and concise description of what you want to happen.
13 |
14 | **Describe alternatives you've considered**
15 | A clear and concise description of any alternative solutions or features you've considered.
16 |
17 | **Additional context**
18 | Add any other context or screenshots about the feature request here.
19 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | .DS_Store
3 | local
4 | target
5 | project/target
6 | project/project/target
7 | daflow-core/target
8 | daflow-core/.DS_Store
9 | daflow-core/src/.DS_Store
10 | daflow-core/src/main/.DS_Store
11 | daflow-core/src/test/.DS_Store
12 | daflow-core/src/main/resources/.DS_Store
13 | daflow-commons/target
14 | daflow-commons/.DS_Store
15 | daflow-commons/src/.DS_Store
16 | daflow-commons/src/main/.DS_Store
17 | daflow-core/src/test/.DS_Store
18 | daflow-commons/src/main/resources/.DS_Store
19 | daflow-core/*.iml
20 | daflow-commons/*.iml
21 | daflow-metrics/*.iml
22 | daflow-sql-parser/*.iml
23 | daflow-job-conf/daflow-job-conf-xml/*.iml
24 | daflow-job-conf/daflow-job-conf-yaml/*.iml
25 | docker/images/*.iml
26 | docker/images/hive/*.iml
27 | docker/images/hadoop/base/*.iml
28 | docker/images/hadoop/datanode/*.iml
29 | docker/images/hadoop/namenode/*.iml
30 | docker/images/hadoop/historyserver/*.iml
31 | docker/images/spark/base/*.iml
32 | docker/images/spark/master/*.iml
33 | docker/images/spark/worker/*.iml
34 | daflow-examples/demo/aritfacts/*
35 | daflow-examples/demo/sample-data/daflow-result/*
36 | daflow-examples/demo/sample-feed-stats/*
37 |
38 | *.iml
39 | **/*.iml
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: java
2 |
3 | jdk:
4 | - oraclejdk8
5 |
6 | sudo: required
7 |
8 | services:
9 | - docker
10 |
11 | cache:
12 | directories:
13 | - $HOME/.m2
14 |
15 | script:
16 | - mvn -Ddocker.build.skip=$DOCKER_BUILD_SKIP -Ddocker.reg=$DOCKER_REGISTRY -Ddocker.reg.username=$DOCKER_USERNAME -Ddocker.reg.password=$DOCKER_PASSWORD -q clean install
17 |
18 | after_success:
19 | - mvn -q clean cobertura:cobertura
20 | - bash <(curl -s https://codecov.io/bash)
21 |
--------------------------------------------------------------------------------
/CNAME:
--------------------------------------------------------------------------------
1 | daflow.sparsecode.io
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as
6 | contributors and maintainers pledge to making participation in our project and
7 | our community a harassment-free experience for everyone, regardless of age, body
8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 |
12 | ## Our Standards
13 |
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 |
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 |
23 | Examples of unacceptable behavior by participants include:
24 |
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 | address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 | professional setting
33 |
34 | ## Our Responsibilities
35 |
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 |
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 |
46 | ## Scope
47 |
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 |
55 | ## Enforcement
56 |
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at abhioncbr@yahoo.com. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 |
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 |
68 | ## Attribution
69 |
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 |
73 | [homepage]: https://www.contributor-covenant.org
74 |
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as
6 | contributors and maintainers pledge to making participation in our project and
7 | our community a harassment-free experience for everyone, regardless of age, body
8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 |
12 | ## Our Standards
13 |
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 |
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 |
23 | Examples of unacceptable behavior by participants include:
24 |
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 | address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 | professional setting
33 |
34 | ## Our Responsibilities
35 |
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 |
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 |
46 | ## Scope
47 |
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 |
55 | ## Enforcement
56 |
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at abhioncbr@yahoo.com. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 |
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 |
68 | ## Attribution
69 |
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 |
73 | [homepage]: https://www.contributor-covenant.org
74 |
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 |
--------------------------------------------------------------------------------
/DaFlow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sparsecode/DaFlow/068ec7582ef58cbc6c782d2bc19a43aca2dead81/DaFlow.png
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | This software is licensed under the Apache 2 license, quoted below.
2 |
3 | Copyright (C) 2018-2019 Abhishek Sharma
4 |
5 | Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 | use this file except in compliance with the License. You may obtain a copy of
7 | the License at
8 |
9 | [http://www.apache.org/licenses/LICENSE-2.0]
10 |
11 | Unless required by applicable law or agreed to in writing, software
12 | distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 | WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 | License for the specific language governing permissions and limitations under
15 | the License.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | #DaFlow [Data Flow(ETL) Framework]
4 |
5 | [](https://travis-ci.org/abhioncbr/DaFlow/)
6 | [](https://www.apache.org/licenses/LICENSE-2.0.txt)
7 | [](https://codecov.io/gh/abhioncbr/DaFlow)
8 | [](https://codeclimate.com/github/abhioncbr/DaFlow)
9 |
10 | Apache-Spark based Data Flow(ETL) Framework which supports multiple read, write destinations of different types and also support multiple categories of transformation rules.
--------------------------------------------------------------------------------
/daflow-commons/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sparsecode/DaFlow/068ec7582ef58cbc6c782d2bc19a43aca2dead81/daflow-commons/README.md
--------------------------------------------------------------------------------
/daflow-commons/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | com.abhioncbr.daflow
7 | daflow
8 | ${revision}
9 |
10 | 4.0.0
11 |
12 | jar
13 | daflow-commons
14 | daflow-commons
15 | ${daflow.common.version}
16 |
17 |
18 |
19 | Apache License, Version 2.0
20 | http://www.apache.org/licenses/LICENSE-2.0.txt
21 | repo
22 | A business-friendly OSS license
23 |
24 |
25 |
26 |
27 |
28 | scala-tools.org
29 | Scala-Tools Maven2 Repository
30 | http://scala-tools.org/repo-releases
31 |
32 |
33 |
34 |
35 |
36 | scala-tools.org
37 | Scala-Tools Maven2 Repository
38 | http://scala-tools.org/repo-releases
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 | org.scala-tools
55 | maven-scala-plugin
56 |
57 | ${scala.version}
58 |
59 |
60 |
61 |
62 |
63 |
--------------------------------------------------------------------------------
/daflow-commons/src/main/scala/com/abhioncbr/daflow/commons/CommonConstants.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.commons
19 |
20 | object CommonConstants {
21 | val DIRECTORY_SEPARATOR: Char = '/'
22 | }
23 |
--------------------------------------------------------------------------------
/daflow-commons/src/main/scala/com/abhioncbr/daflow/commons/Context.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.commons
19 |
20 | object Context {
21 | private val contextualObjects = scala.collection.mutable.Map[ContextConstantEnum.constant, Any]()
22 |
23 | def addContextualObject[T](key: ContextConstantEnum.constant, obj: T): Unit = {
24 | contextualObjects +=(key -> obj)
25 | }
26 |
27 | def getContextualObject[T](key: ContextConstantEnum.constant): T = {
28 | val output = contextualObjects.getOrElse(key, None)
29 | output.asInstanceOf[T]
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/daflow-commons/src/main/scala/com/abhioncbr/daflow/commons/ContextConstantEnum.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.commons
19 |
20 | object ContextConstantEnum extends Enumeration{
21 | type constant = Value
22 | val START_DATE, END_DATE,
23 | HADOOP_CONF, SPARK_CONTEXT, SQL_CONTEXT,
24 | JOB_STATIC_PARAM_CONF, EXTRACT_CONF, TRANSFORM_CONF, LOAD_CONF,
25 | SCHEMA, OTHER_PARAM = Value
26 | }
27 |
--------------------------------------------------------------------------------
/daflow-commons/src/main/scala/com/abhioncbr/daflow/commons/ExecutionResult.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.commons
19 |
20 | import org.apache.spark.sql.DataFrame
21 |
22 | case class ExecutionResult(feedName: String, resultDF: DataFrame, otherAttributes: Option[Map[String, Any]] = None)
23 |
--------------------------------------------------------------------------------
/daflow-commons/src/main/scala/com/abhioncbr/daflow/commons/NotificationMessages.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.commons
19 |
20 | object NotificationMessages {
21 | val fileDoesNotExist: String => String =
22 | (filePath: String) => { s"Provided file path '$filePath' doesn't exist." }
23 |
24 | val jobXmlFileDoesNotExist: String => String =
25 | (filePath: String) => { s"Not able to load job xml file. Provided path: '$filePath'" }
26 |
27 | val exceptionMessage: Exception => String =
28 | (exception: Exception) => { s"Exception message: ${exception.getMessage}" }
29 |
30 | val unknownXMLEntity: String = "Unknown entity found instead of ''"
31 | val exceptionWhileParsing: String = "Exception while parsing job xml file. Please validate xml."
32 |
33 | // extract
34 | val extractNotSupported: String => String =
35 | (extractType: String) => { s"extracting data from $extractType is not supported right now" }
36 | }
37 |
--------------------------------------------------------------------------------
/daflow-commons/src/main/scala/com/abhioncbr/daflow/commons/ProcessFrequencyEnum.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.commons
19 |
20 | object ProcessFrequencyEnum extends Enumeration {
21 | type frequencyType = Value
22 | val ONCE, HOURLY, DAILY, WEEKLY, MONTHLY, YEARLY, DATE_RANGE = Value
23 |
24 | def getProcessFrequencyEnum(frequencyString: String): ProcessFrequencyEnum.frequencyType = {
25 | val processFrequencyEnum = frequencyString match {
26 | case "ONCE" => ProcessFrequencyEnum.ONCE
27 | case "HOURLY" => ProcessFrequencyEnum.HOURLY
28 | case "DAILY" => ProcessFrequencyEnum.DAILY
29 | case "WEEKLY" => ProcessFrequencyEnum.WEEKLY
30 | case "MONTHLY" => ProcessFrequencyEnum.MONTHLY
31 | case "YEARLY" => ProcessFrequencyEnum.YEARLY
32 | case "DATE_RANGE" => ProcessFrequencyEnum.DATE_RANGE
33 | case _ => throw new RuntimeException(s"'$frequencyString', process frequency not supported.")
34 | }
35 | processFrequencyEnum
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/daflow-commons/src/main/scala/com/abhioncbr/daflow/commons/conf/DaFlowJobConf.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.commons.conf
19 |
20 | import com.abhioncbr.daflow.commons.conf.extract.ExtractConf
21 | import com.abhioncbr.daflow.commons.conf.load.LoadConf
22 | import com.abhioncbr.daflow.commons.conf.transform.TransformConf
23 |
24 | case class DaFlowJobConf(jobStaticParam: JobStaticParamConf, extract: ExtractConf, transform: TransformConf, load: LoadConf)
25 |
--------------------------------------------------------------------------------
/daflow-commons/src/main/scala/com/abhioncbr/daflow/commons/conf/JobStaticParamConf.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.commons.conf
19 |
20 | import com.abhioncbr.daflow.commons.ProcessFrequencyEnum
21 | import com.abhioncbr.daflow.commons.conf.common.GeneralParamConf
22 |
23 | case class JobStaticParamConf(processFrequency: ProcessFrequencyEnum.frequencyType, jobName: String, publishStats: Boolean,
24 | otherParams: Option[Array[GeneralParamConf]] = None)
25 |
--------------------------------------------------------------------------------
/daflow-commons/src/main/scala/com/abhioncbr/daflow/commons/conf/common/DataPath.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.commons.conf.common
19 |
20 | case class DataPath(pathPrefix: Option[String], cataloguePatterns: Option[Array[PathInfixParam]] = None,
21 | feedPattern: Option[PathInfixParam] = None, fileName: Option[FileNameParam] = None)
22 |
23 | case class PathInfixParam(order: Option[Int] = None, infixPattern: String,
24 | formatInfix: Option[Boolean] = Some(false), formatInfixArgs: Option[Array[GeneralParamConf]] = None)
25 |
26 | case class FileNameParam(fileNamePrefix: Option[String] = None, fileNameSuffix: Option[String] = None,
27 | fileNameSeparator: Option[String] = Some("."))
28 |
--------------------------------------------------------------------------------
/daflow-commons/src/main/scala/com/abhioncbr/daflow/commons/conf/common/FieldMappingConf.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.commons.conf.common
19 |
20 | case class FieldMappingConf(sourceFieldName: String, targetFieldName: String)
21 |
--------------------------------------------------------------------------------
/daflow-commons/src/main/scala/com/abhioncbr/daflow/commons/conf/common/GeneralParamConf.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.commons.conf.common
19 |
20 | case class GeneralParamConf(order: Int, paramName: String, paramValue: String, paramDefaultValue: String)
21 |
22 |
--------------------------------------------------------------------------------
/daflow-commons/src/main/scala/com/abhioncbr/daflow/commons/conf/common/QueryConf.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.commons.conf.common
19 |
20 | case class QueryConf(queryFile: QueryFilesConf, queryArgs: Option[Array[GeneralParamConf]])
21 | case class QueryFilesConf(configurationFile: Option[DataPath], queryFile: Option[DataPath])
22 |
--------------------------------------------------------------------------------
/daflow-commons/src/main/scala/com/abhioncbr/daflow/commons/conf/extract/ExtractConf.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.commons.conf.extract
19 |
20 | import com.abhioncbr.daflow.commons.conf.common.DataPath
21 | import com.abhioncbr.daflow.commons.conf.common.QueryConf
22 |
23 | case class ExtractConf(feeds: Array[ExtractFeedConf])
24 | case class ExtractFeedConf(extractFeedName: String, extractionType: ExtractionType.valueType,
25 | extractionAttributesMap: Map[String, String], dataPath: Option[DataPath], query: Option[QueryConf],
26 | validateExtractedData: Boolean)
27 |
--------------------------------------------------------------------------------
/daflow-commons/src/main/scala/com/abhioncbr/daflow/commons/conf/extract/ExtractionType.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.commons.conf.extract
19 |
20 | object ExtractionType extends Enumeration {
21 | type valueType = Value
22 | val JDBC, HIVE, FILE_SYSTEM, UNSUPPORTED = Value
23 |
24 | def getValueType(valueTypeString: String): ExtractionType.valueType = {
25 | val valueType = valueTypeString match {
26 | case "JDBC" => ExtractionType.JDBC
27 | case "HIVE" => ExtractionType.HIVE
28 | case "FILESYSTEM" => ExtractionType.FILE_SYSTEM
29 | case "UNSUPPORTED" => ExtractionType.UNSUPPORTED
30 | }
31 | valueType
32 | }
33 |
34 | def getDataValue(valueType: ExtractionType.valueType): String = {
35 | val output = valueType match {
36 | case JDBC => "JDBC"
37 | case HIVE => "HIVE"
38 | case FILE_SYSTEM => "FILE_SYSTEM"
39 | case UNSUPPORTED => "UNSUPPORTED"
40 | }
41 | output
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/daflow-commons/src/main/scala/com/abhioncbr/daflow/commons/conf/load/LoadConf.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.commons.conf.load
19 |
20 | import com.abhioncbr.daflow.commons.conf.common.DataPath
21 |
22 | case class LoadConf(feeds: Array[LoadFeedConf])
23 | case class LoadFeedConf(loadFeedName: String, loadType: LoadType.valueType, attributesMap: Map[String, String],
24 | dataPath: DataPath, partitioningData: Option[PartitioningDataConf] )
25 |
--------------------------------------------------------------------------------
/daflow-commons/src/main/scala/com/abhioncbr/daflow/commons/conf/load/LoadType.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.commons.conf.load
19 |
20 | object LoadType extends Enumeration {
21 | type valueType = Value
22 | val JDBC, HIVE, FILE_SYSTEM = Value
23 |
24 | def getValueType(valueTypeString: String): LoadType.valueType = {
25 | val valueType = valueTypeString match {
26 | case "JDBC" => LoadType.JDBC
27 | case "HIVE" => LoadType.HIVE
28 | case "FILESYSTEM" => LoadType.FILE_SYSTEM
29 | }
30 | valueType
31 | }
32 |
33 | def getDataValue(valueType: LoadType.valueType): String = {
34 | val output = valueType match {
35 | case JDBC => "JDBC"
36 | case HIVE => "HIVE"
37 | case FILE_SYSTEM => "FILESYSTEM"
38 | }
39 | output
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/daflow-commons/src/main/scala/com/abhioncbr/daflow/commons/conf/load/PartitioningDataConf.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.commons.conf.load
19 |
20 | import com.abhioncbr.daflow.commons.conf.common.GeneralParamConf
21 |
22 | case class PartitioningDataConf(coalesce: Boolean, overwrite: Boolean, coalesceCount: Int,
23 | partitionColumns: List[GeneralParamConf])
24 |
--------------------------------------------------------------------------------
/daflow-commons/src/main/scala/com/abhioncbr/daflow/commons/conf/transform/TransformConf.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.commons.conf.transform
19 |
20 | case class TransformConf(transformSteps: List[TransformStepConf], validateTransformedData: Boolean)
21 | case class TransformStepConf(order: Int, rules: Map[String, TransformRuleConf])
22 | case class TransformRuleConf(ruleType: String, condition: String, ruleAttributesMap: Map[String, String])
23 |
--------------------------------------------------------------------------------
/daflow-commons/src/test/scala/com/abhioncbr/daflow/commons/CommonSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.commons
19 |
20 | import ContextConstantEnum.HADOOP_CONF
21 | import org.apache.hadoop.conf.Configuration
22 | import org.scalatest.BeforeAndAfterEach
23 | import org.scalatest.FlatSpec
24 | import org.scalatest.Matchers
25 |
26 | class CommonSpec extends FlatSpec with Matchers with BeforeAndAfterEach {
27 |
28 | override def beforeEach(): Unit = {
29 | super.beforeEach()
30 |
31 | val dir: String = System.getProperty("user.dir")
32 | System.setProperty("hadoop.home.dir", dir)
33 | Context.addContextualObject[Configuration](HADOOP_CONF, new Configuration())
34 | }
35 |
36 | }
37 |
--------------------------------------------------------------------------------
/daflow-commons/src/test/scala/com/abhioncbr/daflow/commons/util/FileUtilSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.commons.util
19 |
20 | import com.abhioncbr.daflow.commons.CommonConstants.{DIRECTORY_SEPARATOR => DS}
21 | import com.abhioncbr.daflow.commons.CommonSpec
22 | import com.abhioncbr.daflow.commons.Fixture
23 |
24 | class FileUtilSpec extends CommonSpec {
25 |
26 | "getFilePathString" should "return only pathPrefix as path string" in {
27 | val expectedPath: String = Fixture.pathPrefix + DS
28 | val pathString: String = FileUtil.getFilePathString(Fixture.dataPath)
29 | pathString should not be None
30 | pathString should be(expectedPath)
31 | }
32 |
33 | "getFilePathString" should "return pathPrefix & catalogue as path string" in {
34 | val expectedPath: String = Fixture.pathPrefix + DS + Fixture.catalogueStaticInfixPattern1 + DS
35 | val pathString: String = FileUtil.getFilePathString(Fixture.dataPath1)
36 | pathString should not be None
37 | pathString should be(expectedPath)
38 | }
39 |
40 | "getFilePathString" should "return pathPrefix, catalogue & feed as path string" in {
41 | val expectedPath
42 | : String = Fixture.pathPrefix + DS + Fixture.catalogueStaticInfixPattern1 + DS + Fixture.feedStaticInfixParam + DS
43 | val pathString: String = FileUtil.getFilePathString(Fixture.dataPath2)
44 | pathString should not be None
45 | pathString should be(expectedPath)
46 | }
47 |
48 | "getFilePathString" should "return pathPrefix, catalogue, feed and fileName as path string" in {
49 | val expectedPath: String = Fixture.pathPrefix + DS + Fixture.catalogueStaticInfixPattern1 +
50 | DS + Fixture.feedStaticInfixParam + DS + Fixture.fileNamePrefix1 + "." + Fixture.fileNameSuffix1
51 | val pathString: String = FileUtil.getFilePathString(Fixture.dataPath3)
52 | pathString should not be None
53 | pathString should be(expectedPath)
54 | }
55 |
56 | }
57 |
--------------------------------------------------------------------------------
/daflow-core/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sparsecode/DaFlow/068ec7582ef58cbc6c782d2bc19a43aca2dead81/daflow-core/README.md
--------------------------------------------------------------------------------
/daflow-core/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | com.abhioncbr.daflow
7 | daflow
8 | ${revision}
9 |
10 | 4.0.0
11 |
12 | jar
13 | daflow-core
14 | ${daflow.core.version}
15 | daflow-core
16 |
17 |
18 |
19 | Apache License, Version 2.0
20 | http://www.apache.org/licenses/LICENSE-2.0.txt
21 | repo
22 | A business-friendly OSS license
23 |
24 |
25 |
26 |
27 |
28 | scala-tools.org
29 | Scala-Tools Maven2 Repository
30 | http://scala-tools.org/repo-releases
31 |
32 |
33 |
34 |
35 |
36 | scala-tools.org
37 | Scala-Tools Maven2 Repository
38 | http://scala-tools.org/repo-releases
39 |
40 |
41 |
42 |
43 | 3.3.0
44 |
45 |
46 |
47 |
48 | com.abhioncbr.daflow
49 | daflow-commons
50 | ${daflow.common.version}
51 |
52 |
53 |
54 | com.abhioncbr.daflow
55 | daflow-job-conf-xml
56 | ${daflow.job.conf.xml.version}
57 |
58 |
59 |
60 | com.abhioncbr.daflow
61 | daflow-metrics
62 | ${daflow.metrics.version}
63 |
64 |
65 |
66 | com.abhioncbr.daflow
67 | daflow-sql-parser
68 | ${daflow.sql.parser.version}
69 |
70 |
71 |
72 | com.github.scopt
73 | scopt_2.11
74 | ${scopt.version}
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 | org.apache.maven.plugins
83 | maven-shade-plugin
84 | 3.2.1
85 |
86 |
87 | package
88 |
89 | shade
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 | org.scala-tools
101 | maven-scala-plugin
102 |
103 | ${scala.version}
104 |
105 |
106 |
107 |
108 |
109 |
--------------------------------------------------------------------------------
/daflow-core/src/main/scala/com/abhioncbr/daflow/core/extractData/AbstractExtractData.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.core.extractData
19 |
20 | abstract class AbstractExtractData extends ExtractData
21 |
--------------------------------------------------------------------------------
/daflow-core/src/main/scala/com/abhioncbr/daflow/core/extractData/ExtractData.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.core.extractData
19 |
20 | import com.abhioncbr.daflow.commons.ExecutionResult
21 |
22 | trait ExtractData { def getRawData: Either[ExecutionResult, String] }
23 |
--------------------------------------------------------------------------------
/daflow-core/src/main/scala/com/abhioncbr/daflow/core/extractData/ExtractDataFromDB.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.core.extractData
19 |
20 | import com.abhioncbr.daflow.commons.Context
21 | import com.abhioncbr.daflow.commons.ContextConstantEnum._
22 | import com.abhioncbr.daflow.commons.ExecutionResult
23 | import com.abhioncbr.daflow.commons.NotificationMessages.{exceptionMessage => EM}
24 | import com.abhioncbr.daflow.commons.conf.common.GeneralParamConf
25 | import com.abhioncbr.daflow.commons.conf.common.QueryConf
26 | import com.abhioncbr.daflow.commons.conf.extract.ExtractFeedConf
27 | import com.abhioncbr.daflow.commons.util.FileUtil
28 | import com.typesafe.scalalogging.Logger
29 | import java.io.BufferedReader
30 | import java.io.InputStreamReader
31 | import java.util.Properties
32 | import org.apache.hadoop.conf.Configuration
33 | import org.apache.hadoop.fs.FileSystem
34 | import org.apache.hadoop.fs.Path
35 | import org.apache.spark.sql.DataFrame
36 | import org.apache.spark.sql.SQLContext
37 |
38 | class ExtractDataFromDB(feed: ExtractFeedConf) extends AbstractExtractData {
39 | private val logger = Logger(this.getClass)
40 | val query: Option[QueryConf] = feed.query
41 |
42 | def getRawData: Either[ExecutionResult, String] = {
43 | try {
44 | lazy val fs = FileSystem.get(new Configuration())
45 |
46 | // reading database properties from property file.
47 | val propertyFilePath =
48 | FileUtil.getFilePathString(query.get.queryFile.configurationFile.get)
49 | logger.info(
50 | s"[ExtractDataFromDB]-[getRawData]: DB property file path: $propertyFilePath"
51 | )
52 |
53 | val connectionProps = new Properties()
54 | connectionProps.load(fs.open(new Path(propertyFilePath)))
55 | val dbUri = connectionProps.getProperty("dburi")
56 |
57 | // reading query from the query file.
58 | val sqlQueryFile =
59 | FileUtil.getFilePathString(query.get.queryFile.queryFile.get)
60 | val tableQueryReader = new BufferedReader(
61 | new InputStreamReader(fs.open(new Path(sqlQueryFile)))
62 | )
63 | val rawQuery = Stream
64 | .continually(tableQueryReader.readLine())
65 | .takeWhile(_ != null)
66 | .toArray[String]
67 | .mkString
68 | .stripMargin
69 |
70 | val sqlQueryParams: Array[GeneralParamConf] = query.get.queryArgs.get
71 | val queryParams = ExtractUtil.getParamsValue(sqlQueryParams.toList)
72 |
73 | logger.info(
74 | "[ExtractDataFromDB]-[getRawData]: Query param values: " + queryParams
75 | .mkString(" , ")
76 | )
77 | val tableQuery = String.format(rawQuery, queryParams: _*)
78 | logger.info(
79 | s"[ExtractDataFromDB]-[getRawData]: Going to execute jdbc query: \\n $tableQuery"
80 | )
81 |
82 | val sqlContext = Context.getContextualObject[SQLContext](SQL_CONTEXT)
83 | val dataFrame: DataFrame = sqlContext.read.jdbc(
84 | url = dbUri,
85 | table = tableQuery,
86 | properties = connectionProps
87 | )
88 | Left(ExecutionResult(feed.extractFeedName, dataFrame))
89 | } catch {
90 | case exception: Exception =>
91 | logger.error("[ExtractDataFromDB]-[getRawData]: ", exception)
92 | Right(s"[ExtractDataFromDB]-[getRawData]: ${EM(exception)}".stripMargin)
93 | }
94 | }
95 | }
96 |
--------------------------------------------------------------------------------
/daflow-core/src/main/scala/com/abhioncbr/daflow/core/extractData/ExtractDataFromFileSystem.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.core.extractData
19 |
20 | import com.abhioncbr.daflow.commons.Context
21 | import com.abhioncbr.daflow.commons.ContextConstantEnum._
22 | import com.abhioncbr.daflow.commons.ExecutionResult
23 | import com.abhioncbr.daflow.commons.NotificationMessages.{exceptionMessage => EM}
24 | import com.abhioncbr.daflow.commons.NotificationMessages.{extractNotSupported => ENS}
25 | import com.abhioncbr.daflow.commons.conf.common.DataPath
26 | import com.abhioncbr.daflow.commons.conf.extract.ExtractFeedConf
27 | import com.abhioncbr.daflow.commons.util.FileUtil
28 | import com.typesafe.scalalogging.Logger
29 | import org.apache.spark.sql.SQLContext
30 |
31 | class ExtractDataFromFileSystem(feed: ExtractFeedConf) extends ExtractData {
32 | private val logger = Logger(this.getClass)
33 | val dataPath: Option[DataPath] = feed.dataPath
34 |
35 | def getRawData: Either[ExecutionResult, String] = {
36 | try {
37 | val sqlContext: SQLContext =
38 | Context.getContextualObject[SQLContext](SQL_CONTEXT)
39 | val fileNamePatternString = FileUtil.getFilePathString(dataPath.get)
40 | logger.info(
41 | s"[ExtractDataFromFileSystem]-[getRawData]: path of data extraction: $fileNamePatternString"
42 | )
43 |
44 | val output: Either[ExecutionResult, String] =
45 | feed.extractionAttributesMap("fileType") match {
46 | case "CSV" =>
47 | Left(
48 | ExecutionResult(
49 | feed.extractFeedName,
50 | sqlContext.read.csv(fileNamePatternString)
51 | )
52 | )
53 | case "JSON" =>
54 | Left(
55 | ExecutionResult(
56 | feed.extractFeedName,
57 | sqlContext.read.json(fileNamePatternString)
58 | )
59 | )
60 | case "PARQUET" =>
61 | Left(
62 | ExecutionResult(
63 | feed.extractFeedName,
64 | sqlContext.read.parquet(fileNamePatternString)
65 | )
66 | )
67 | case _ =>
68 | Right(
69 | s"[ExtractDataFromFileSystem]-[getRawData]: ${ENS(feed.extractionAttributesMap("fileType"))}"
70 | )
71 | }
72 | output
73 | } catch {
74 | case exception: Exception =>
75 | logger.error("[ExtractDataFromFileSystem]-[getRawData]: ", exception)
76 | Right(
77 | s"[ExtractDataFromFileSystem]-[getRawData]: ${EM(exception)}".stripMargin
78 | )
79 | }
80 | }
81 | }
82 |
--------------------------------------------------------------------------------
/daflow-core/src/main/scala/com/abhioncbr/daflow/core/extractData/ExtractDataFromHive.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.core.extractData
19 |
20 | import com.abhioncbr.daflow.commons.Context
21 | import com.abhioncbr.daflow.commons.ContextConstantEnum._
22 | import com.abhioncbr.daflow.commons.ExecutionResult
23 | import com.abhioncbr.daflow.commons.NotificationMessages.{exceptionMessage => EM}
24 | import com.abhioncbr.daflow.commons.conf.common.GeneralParamConf
25 | import com.abhioncbr.daflow.commons.conf.common.QueryConf
26 | import com.abhioncbr.daflow.commons.conf.extract.ExtractFeedConf
27 | import com.abhioncbr.daflow.commons.util.FileUtil
28 | import com.typesafe.scalalogging.Logger
29 | import java.io.BufferedReader
30 | import java.io.InputStreamReader
31 | import org.apache.hadoop.conf.Configuration
32 | import org.apache.hadoop.fs.FileSystem
33 | import org.apache.hadoop.fs.Path
34 | import org.apache.spark.sql.DataFrame
35 | import org.apache.spark.sql.SQLContext
36 |
37 | class ExtractDataFromHive(feed: ExtractFeedConf) extends AbstractExtractData {
38 | private val logger = Logger(this.getClass)
39 | val query: Option[QueryConf] = feed.query
40 |
41 | def getRawData: Either[ExecutionResult, String] = {
42 | try {
43 | lazy val fs = FileSystem.get(new Configuration())
44 |
45 | // reading query from the query file.
46 | val sqlQueryFile: String =
47 | FileUtil.getFilePathString(query.get.queryFile.queryFile.get)
48 | val tableQueryReader = new BufferedReader(
49 | new InputStreamReader(fs.open(new Path(sqlQueryFile)))
50 | )
51 | val rawQuery = Stream
52 | .continually(tableQueryReader.readLine())
53 | .takeWhile(_ != null)
54 | .toArray[String]
55 | .mkString
56 | .stripMargin
57 |
58 | val sqlQueryParams: Array[GeneralParamConf] = query.get.queryArgs.get
59 | val queryParams = ExtractUtil.getParamsValue(sqlQueryParams.toList)
60 | logger.info(
61 | "[ExtractDataFromHive]-[getRawData]: Qquery param values" + queryParams
62 | .mkString(" , ")
63 | )
64 | val tableQuery = String.format(rawQuery, queryParams: _*)
65 | logger.info(
66 | s"[ExtractDataFromHive]-[getRawData]: Going to execute hive query: \\n $tableQuery"
67 | )
68 |
69 | val sqlContext = Context.getContextualObject[SQLContext](SQL_CONTEXT)
70 | val dataFrame: DataFrame = sqlContext.sql(tableQuery)
71 | Left(ExecutionResult(feed.extractFeedName, dataFrame))
72 | } catch {
73 | case exception: Exception =>
74 | logger.error("[ExtractDataFromHive]-[getRawData]: ", exception)
75 | Right(
76 | s"[ExtractDataFromHive]-[getRawData]: ${EM(exception)}".stripMargin
77 | )
78 | }
79 | }
80 | }
81 |
--------------------------------------------------------------------------------
/daflow-core/src/main/scala/com/abhioncbr/daflow/core/extractData/ExtractUtil.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.core.extractData
19 |
20 | import com.abhioncbr.daflow.commons.conf.common.GeneralParamConf
21 | import com.abhioncbr.daflow.commons.util.FileUtil
22 |
23 | object ExtractUtil {
24 | def getParamsValue(paramList: List[GeneralParamConf]): Array[Object] = {
25 | paramList
26 | .map(
27 | queryParam =>
28 | (queryParam.order, FileUtil.mapFormatArgs(Some(paramList.toArray)))
29 | )
30 | .sortBy(_._1)
31 | .map(_._2)
32 | .toArray
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/daflow-core/src/main/scala/com/abhioncbr/daflow/core/loadData/LoadData.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.core.loadData
19 |
20 | import org.apache.spark.sql.DataFrame
21 | import org.joda.time.DateTime
22 |
23 | trait LoadData{
24 | def loadTransformedData(dataFrame: DataFrame, date: Option[DateTime]): Either[Boolean, String]
25 | }
26 |
27 |
--------------------------------------------------------------------------------
/daflow-core/src/main/scala/com/abhioncbr/daflow/core/loadData/LoadDataIntoFileSystem.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.core.loadData
19 |
20 | import com.abhioncbr.daflow.commons.Context
21 | import com.abhioncbr.daflow.commons.ContextConstantEnum.JOB_STATIC_PARAM_CONF
22 | import com.abhioncbr.daflow.commons.conf.JobStaticParamConf
23 | import com.abhioncbr.daflow.commons.conf.common.DataPath
24 | import com.abhioncbr.daflow.commons.conf.load.LoadFeedConf
25 | import com.abhioncbr.daflow.commons.util.FileUtil
26 | import com.typesafe.scalalogging.Logger
27 | import org.apache.spark.sql.DataFrame
28 | import org.apache.spark.sql.SaveMode
29 | import org.joda.time.DateTime
30 |
31 | class LoadDataIntoFileSystem(feed: LoadFeedConf) extends LoadData {
32 | private val logger = Logger(this.getClass)
33 | private val processFrequency = Context
34 | .getContextualObject[JobStaticParamConf](JOB_STATIC_PARAM_CONF)
35 | .processFrequency
36 |
37 | private val datasetName: String =
38 | feed.attributesMap.getOrElse("catalogName", "")
39 | private val feedName = feed.attributesMap.getOrElse("feedName", "")
40 | private val dataPath: DataPath = feed.dataPath
41 |
42 | def loadTransformedData(
43 | dataFrame: DataFrame,
44 | date: Option[DateTime] = None
45 | ): Either[Boolean, String] = {
46 | val path = FileUtil.getFilePathString(dataPath)
47 |
48 | try {
49 | logger.info(
50 | s"Writing $processFrequency dataFrame for dataset: $datasetName, feed $feedName to ($path). " +
51 | s"Total number of data rows saved: ${dataFrame.count}"
52 | )
53 |
54 | val fileType = feed.attributesMap("fileType")
55 |
56 | val output: Either[Boolean, String] = fileType match {
57 | case "CSV" =>
58 | dataFrame.write.mode(SaveMode.Overwrite).csv(path)
59 | logger.info(s"Data written at ($path) successfully.")
60 | Left(true)
61 |
62 | case "JSON" =>
63 | dataFrame.write.mode(SaveMode.Overwrite).json(path)
64 | logger.info(s"Data written at ($path) successfully.")
65 | Left(true)
66 |
67 | case "PARQUET" =>
68 | dataFrame.write.mode(SaveMode.Overwrite).parquet(path)
69 | logger.info(s"Data written at ($path) successfully.")
70 | Left(true)
71 |
72 | case _ => Right(s"file type '$fileType' not supported.")
73 | }
74 |
75 | output
76 | }
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/daflow-core/src/main/scala/com/abhioncbr/daflow/core/loadData/LoadUtil.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.core.loadData
19 |
20 | import com.abhioncbr.daflow.commons.conf.load.PartitioningDataConf
21 |
22 | object LoadUtil {
23 | def getPartitioningString(data: PartitioningDataConf): String = {
24 | data.partitionColumns.map(col => s"${col.paramName} = '${col.paramValue}'").mkString(" , ")
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/daflow-core/src/main/scala/com/abhioncbr/daflow/core/transformData/Transform.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.core.transformData
19 |
20 | case class Transform(transformSteps: List[TransformStep], validateTransformedData: Boolean)
21 |
--------------------------------------------------------------------------------
/daflow-core/src/main/scala/com/abhioncbr/daflow/core/transformData/TransformData.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.core.transformData
19 |
20 | import com.abhioncbr.daflow.commons.ExecutionResult
21 | import com.typesafe.scalalogging.Logger
22 |
23 | class TransformData(transform : Transform) {
24 | private val logger = Logger(this.getClass)
25 |
26 | val test: (Either[Array[ExecutionResult], String], TransformStep) => Either[Array[ExecutionResult], String] = (input, step) => {
27 | input match {
28 | case Left(array) =>
29 | step.addInputData(array.map(res => res.resultDF)) match {
30 | case None =>
31 |
32 | // val stepOutput: ArrayBuffer[ExecutionResult] = new ArrayBuffer()
33 | val stepOutput: List[Either[Array[ExecutionResult], String]] = step.getRules.zipWithIndex.map(rule => {
34 | logger.info(s"step order: ${step.getOrder}, rule: $rule - checking condition")
35 | if (rule._1._2.condition(step.getInputData)) {
36 | logger.info(s"step order: ${step.getOrder}, rule: $rule - executing")
37 | rule._1._2.execute(step.getInputData) match {
38 | case Left(outputArray) => Left(outputArray)
39 | case Right(s) => Right(s)
40 | }
41 | } else {
42 | Right(s"For transformation step order: ${step.getOrder}, rule group:${rule._1._2.getGroup} : condition failed.")
43 | }
44 | }).toList
45 |
46 | val filteredStepOutput = stepOutput.filter(_.isRight)
47 | if(filteredStepOutput.nonEmpty) { Right(filteredStepOutput.mkString(" \\n ")) }
48 | else { Left(stepOutput.flatMap(_.left.get).toArray) }
49 |
50 | case Some(s) => Right(s)
51 | }
52 |
53 | case Right(e) => Right(e)
54 | }
55 | }
56 |
57 | def performTransformation(extractResult: Array[ExecutionResult]): Either[Array[ExecutionResult], String] = {
58 | val steps = transform.transformSteps
59 | val stepOutput: Either[Array[ExecutionResult], String] = Left(extractResult)
60 |
61 | val output: Either[Array[ExecutionResult], String] = steps.foldLeft(stepOutput)((c, n) => test(c, n))
62 | output
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/daflow-core/src/main/scala/com/abhioncbr/daflow/core/transformData/TransformStep.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.core.transformData
19 |
20 | import org.apache.spark.sql.DataFrame
21 |
22 | class TransformStep(order: Int, rules: Map[String, TransformRule]){
23 | override def toString: String = s" step order: $order, step rules: $rules"
24 | def getOrder: Int = order
25 | def getRules: Map[String, TransformRule] = rules
26 |
27 | val inputData: scala.collection.mutable.Map[String, DataFrame] = scala.collection.mutable.Map[String, DataFrame]()
28 | def getInputData(i: String): DataFrame = inputData(i)
29 |
30 | lazy val requiredDF: Array[String] = rules.values.flatMap {
31 | case merge: MergeRule =>
32 | val temp = merge.asInstanceOf[MergeRule].getMergeGroup
33 | Array(temp._1, temp._2)
34 | case default: Any => Array(default.getGroup)
35 | }.toArray
36 |
37 | def addInputData(dataArray: Array[DataFrame]) : Option[String] = {
38 | if (dataArray.length == requiredDF.length) {
39 | inputData.clear
40 | inputData ++= requiredDF.zip(dataArray).toMap
41 | None
42 | } else {
43 | Some(s"For transformation step ${this.getOrder}: input data frames size(${dataArray.length}) " +
44 | s"is not equal to rules map size(${rules.size})")
45 | }
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/daflow-core/src/main/scala/com/abhioncbr/daflow/core/validateData/ValidateData.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.core.validateData
19 |
20 | import com.abhioncbr.daflow.commons.ExecutionResult
21 | import org.apache.spark.sql.types.StructType
22 |
23 | trait ValidateData{
24 | def validateSchema(input: ExecutionResult) : (Boolean, Option[StructType], Option[StructType])
25 | def validateData(input: ExecutionResult, structType: StructType, first: Any, second: Any): Array[ExecutionResult]
26 | }
27 |
--------------------------------------------------------------------------------
/daflow-core/src/main/scala/com/abhioncbr/daflow/core/validateData/ValidateTransformedData.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.core.validateData
19 |
20 | abstract class ValidateTransformedData extends ValidateData
21 | /* private val logger = Logger(this.getClass)
22 | private val sparkContext: SparkContext = Context.getContextualObject[SparkContext](SPARK_CONTEXT)
23 | private val sqlContext: SQLContext = Context.getContextualObject[SQLContext](SQL_CONTEXT)
24 |
25 | private val tableName = Context.getContextualObject[LoadFeedConf](LOAD_CONF).attributesMap("tableName")
26 | private val databaseName = Context.getContextualObject[LoadFeedConf](LOAD_CONF).attributesMap("databaseName")
27 | val partitionColumns: List[String] = Context.getContextualObject[LoadFeedConf](LOAD_CONF).partitioningData.get.
28 | partitionColumns.map(column => column.paramName)
29 |
30 | def validateSchema(dataFrame: DataFrame): (Boolean, Option[StructType], Option[StructType]) = {
31 | logger.info("Validating data frame schema and hive table schema")
32 |
33 | val dataFrameSchema = dataFrame.schema
34 |
35 | var tableSchema = Context.getContextualObject[(Option[StructType], Option[StructType])](SCHEMA)
36 | if(tableSchema == null)
37 | tableSchema = TransformUtil.tableMetadata(tableName, databaseName, sqlContext, partitionColumns)
38 |
39 | val output = if(tableSchema._1.isDefined) tableSchema._1.get == dataFrameSchema else false
40 | (output, tableSchema._1, Some(dataFrameSchema))
41 | }
42 |
43 | def validateData(dataFrame: DataFrame, structType: StructType, first: Any, second: Any):
44 | Array[(DataFrame, DataFrame, Any, Any)] ={
45 | logger.info("Validating data frame row schema and hive table schema")
46 |
47 | //val temp1 = dataFrame.collect
48 | //val temp = temp1.partition(row => compareSchema( row, structType))
49 | //val validatedRdd = sparkContext.parallelize(temp._1)
50 | val validatedDataFrame = sqlContext.createDataFrame(dataFrame.rdd.filter(_.schema == structType), structType)
51 |
52 | //val nonValidatedRdd = sparkContext.parallelize(temp._2)
53 | val nonValidatedDataFrame = sqlContext.createDataFrame(dataFrame.rdd.filter(_.schema != structType), structType)
54 |
55 | Array((validatedDataFrame,nonValidatedDataFrame, first, second))
56 | }
57 |
58 | def compareSchema(row: Row, structType: StructType): Boolean = {
59 | try{ row.schema == structType }
60 | catch { case e: Throwable => println(row.mkString); false }
61 | } */
62 |
--------------------------------------------------------------------------------
/daflow-examples/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sparsecode/DaFlow/068ec7582ef58cbc6c782d2bc19a43aca2dead81/daflow-examples/README.md
--------------------------------------------------------------------------------
/daflow-examples/daflow-xml-templates/extract_jdbc_import.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 | {sql-query-file-path.sql}
13 | {db-property-file-path}
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | test_string2
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 | {partition-file-path-initial}
39 |
40 |
41 |
42 |
43 |
--------------------------------------------------------------------------------
/daflow-examples/daflow-xml-templates/extract_json_import.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 | {json-file-path-suffix}
13 |
14 | json_data
15 | json
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 | {col1}, {col2}, {col3}
27 |
28 |
29 |
30 |
31 |
32 | {col1}, {col2}, {col3}
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 | {partition-file-path-initial}
48 |
49 |
50 |
51 |
52 |
--------------------------------------------------------------------------------
/daflow-examples/daflow-xml-templates/multiple_group_name.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 | {json-file-path-suffix}
18 |
19 | json_data
20 | json
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 | {col1} is not null
31 |
32 |
33 |
34 | {cond2}
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 | records.value
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 | {path}
58 |
59 |
60 |
61 |
62 |
--------------------------------------------------------------------------------
/daflow-examples/daflow-xml-templates/multiple_transform_rule.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 | {json-file-path-suffix}
18 |
19 |
20 | 1
21 | group_%s
22 | true
23 |
24 |
25 |
26 |
27 |
28 |
29 | feed_%s
30 | true
31 |
32 |
33 |
34 |
35 |
36 | json_data
37 | json
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 | {col1} like 'my%'
49 |
50 |
51 |
52 |
53 |
54 | {col2}, {col3}, {col4}, {col5}
55 |
56 |
57 |
58 |
59 |
60 | {col2}, {col3}, {col4}, {col5}
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 | {path}
74 |
75 |
76 |
77 |
78 |
--------------------------------------------------------------------------------
/daflow-examples/demo/daflow-job-xml/json_etl_example.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | daflow-examples/demo/sample-data
11 |
12 | json_data
13 | json
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | test_string1,test_string2
24 |
25 |
26 |
27 |
28 | test_string2
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 | daflow-examples/demo/sample-data/daflow-result/
37 |
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/daflow-examples/demo/sample-data/json_data.json:
--------------------------------------------------------------------------------
1 | {"test_string1":"1","test_string2":"2"}
2 | {"test_string2":"22","test_string3":"3"}
--------------------------------------------------------------------------------
/daflow-examples/scripts/execute_etl_feed.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 |
4 | spark-submit \
5 | --class com.abhioncbr.daflow.core.LaunchDaFlowSparkJobExecution \
6 | daflow-examples/demo/artifacts/daflow-core-0.1-SNAPSHOT.jar \
7 | -j example -c daflow-examples/demo/daflow-job-xml/json_etl_example.xml
--------------------------------------------------------------------------------
/daflow-examples/scripts/execute_etl_feed_airflow.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | {{ params.conf['spark_home'] }}/spark-submit --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
3 | --conf spark.sql.hive.convertMetastoreParquet=false \
4 | --conf spark.yarn.executor.memoryOverhead={{ params.conf['memoryOverhead'] }} \
5 | --conf spark.memory.useLegacyMode=true \
6 | --conf spark.shuffle.memoryFraction=0.5 \
7 | --conf spark.storage.memoryFraction=0.5 \
8 | --conf spark.io.compression.codec=org.apache.spark.io.SnappyCompressionCodec \
9 | --master yarn-client --queue {{ params.conf['queue'] }} \
10 | --num-executors {{ params.conf['num_executors'] }} --driver-memory {{ params.conf['driver_memory'] }} --executor-cores {{ params.conf['executor_cores'] }} \
11 | --executor-memory {{ params.conf['executor_memory'] }} \
12 | --class {{ params.conf['entry_class'] }} \
13 | {{params.conf['app'] }} \
14 | --date "{{ execution_date }}" {{params.arg_string }}
--------------------------------------------------------------------------------
/daflow-job-conf/daflow-job-conf-xml/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sparsecode/DaFlow/068ec7582ef58cbc6c782d2bc19a43aca2dead81/daflow-job-conf/daflow-job-conf-xml/README.md
--------------------------------------------------------------------------------
/daflow-job-conf/daflow-job-conf-xml/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | com.abhioncbr.daflow
7 | daflow
8 | ${revision}
9 | ../../pom.xml
10 |
11 | 4.0.0
12 |
13 | jar
14 | daflow-job-conf-xml
15 | daflow-job-conf-xml
16 | ${daflow.job.conf.xml.version}
17 |
18 |
19 |
20 | Apache License, Version 2.0
21 | http://www.apache.org/licenses/LICENSE-2.0.txt
22 | repo
23 | A business-friendly OSS license
24 |
25 |
26 |
27 |
28 |
29 | scala-tools.org
30 | Scala-Tools Maven2 Repository
31 | http://scala-tools.org/repo-releases
32 |
33 |
34 |
35 |
36 |
37 | scala-tools.org
38 | Scala-Tools Maven2 Repository
39 | http://scala-tools.org/repo-releases
40 |
41 |
42 |
43 |
44 | ${project.parent.basedir}
45 |
46 |
47 |
48 |
49 | com.abhioncbr.daflow
50 | daflow-commons
51 | ${daflow.common.version}
52 | test-jar
53 | test
54 |
55 |
56 |
57 | com.abhioncbr.daflow
58 | daflow-commons
59 | ${daflow.common.version}
60 |
61 |
62 |
63 |
64 |
65 |
66 | org.scalatest
67 | scalatest-maven-plugin
68 |
69 |
70 | ${main.basedir}
71 |
72 |
73 |
74 |
75 | test
76 |
77 | test
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 | org.scala-tools
90 | maven-scala-plugin
91 |
92 | ${scala.version}
93 |
94 |
95 |
96 |
97 |
98 |
--------------------------------------------------------------------------------
/daflow-job-conf/daflow-job-conf-xml/src/main/scala/com/abhioncbr/daflow/job/conf/xml/AttributeTags.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.job.conf.xml
19 |
20 | object AttributeTags {
21 | val NAME: String = "@name"
22 | val TYPE: String = "@type"
23 | val VALUE: String = "@value"
24 | val ORDER: String = "@order"
25 | val JOB_NAME: String = "@jobName"
26 | val FREQUENCY: String = "@frequency"
27 | val FEED_NAME: String = "@feedName"
28 | val SOURCE_NAME: String = "@sourceName"
29 | val TARGET_NAME: String = "@targetName"
30 | val DEFAULT_VALUE: String = "@defaultValue"
31 | val PUBLISH_STATS: String = "@publishStats"
32 | val COALESCE_PARTITION: String = "@coalescePartition"
33 | val OVERWRITE_PARTITION: String = "@overwritePartition"
34 | val VALIDATE_EXTRACTED_DATA: String = "@validateExtractedData"
35 | val COALESCE_PARTITION_COUNT: String = "@coalescePartitionCount"
36 | val VALIDATE_TRANSFORMED_DATA: String = "@validateTransformedData"
37 | }
38 |
--------------------------------------------------------------------------------
/daflow-job-conf/daflow-job-conf-xml/src/main/scala/com/abhioncbr/daflow/job/conf/xml/NodeTags.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.job.conf.xml
19 |
20 | object NodeTags {
21 | // Four root node tags
22 | val LOAD: String = "load"
23 | val EXTRACT: String = "extract"
24 | val TRANSFORM: String = "transform"
25 | val JOB_STATIC_PARAM: String = "jobStaticParam"
26 |
27 | // General node tags
28 | val FEED: String = "feed"
29 | val DATA_PATH: String = "dataPath"
30 |
31 | // Extract node tags
32 | val JDBC: String = "jdbc"
33 | val QUERY: String = "query"
34 | val FILE_SYSTEM: String = "fileSystem"
35 | val QUERY_PARAMS: String = "queryParams"
36 | val SQL_QUERY_FILE: String = "sqlQueryFile"
37 | val CONFIGURATION_FILE: String = "configurationFile"
38 |
39 | // Transform node tags
40 | val RULE: String = "rule"
41 | val STEP: String = "step"
42 | val GROUP: String = "group"
43 | val CONDITION: String = "condition"
44 |
45 | // Load node tags
46 | val HIVE: String = "hive"
47 | val COLUMN: String = "column"
48 | val PARTITION_DATA: String = "partitionData"
49 | val PARTITION_COLUMNS: String = "partitionColumns"
50 |
51 | // Other node tags
52 | val PARAM: String = "param"
53 | val OTHER_PARAMS: String = "otherParams"
54 | val FIELD_MAPPING: String = "fieldMapping"
55 |
56 | // Data path node tags
57 | val PATH: String = "path"
58 | val ORDER: String = "order"
59 | val MEMBER: String = "member"
60 | val PREFIX: String = "prefix"
61 | val SUFFIX: String = "suffix"
62 | val FILE_NAME: String = "fileName"
63 | val SEPARATOR: String = "separator"
64 | val PATH_PATTERN: String = "pathPattern"
65 | val FEED_PATTERN: String = "feedPattern"
66 | val INITIAL_PATH: String = "initialPath"
67 | val GROUP_PATTERN: String = "groupPattern"
68 | val FORMAT_FEED_NAME: String = "formatFeedName"
69 | val FEED_NAME_PATTERN: String = "feedNamePattern"
70 | val FORMAT_ARG_VALUES: String = "formatArgValues"
71 | val FORMAT_GROUP_NAME: String = "formatGroupName"
72 | val GROUP_NAME_PATTERN: String = "groupNamePattern"
73 | }
74 |
--------------------------------------------------------------------------------
/daflow-job-conf/daflow-job-conf-xml/src/main/scala/com/abhioncbr/daflow/job/conf/xml/ParseDaFlowJobXml.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.job.conf.xml
19 |
20 | import com.abhioncbr.daflow.commons.Context
21 | import com.abhioncbr.daflow.commons.ContextConstantEnum.HADOOP_CONF
22 | import com.abhioncbr.daflow.commons.NotificationMessages.{exceptionMessage => EM}
23 | import com.abhioncbr.daflow.commons.NotificationMessages.{unknownXMLEntity => UE}
24 | import com.abhioncbr.daflow.commons.NotificationMessages.{jobXmlFileDoesNotExist => JXF}
25 | import com.abhioncbr.daflow.commons.NotificationMessages.{exceptionWhileParsing => EWP}
26 | import com.abhioncbr.daflow.commons.conf.DaFlowJobConf
27 | import com.abhioncbr.daflow.commons.conf.JobStaticParamConf
28 | import com.abhioncbr.daflow.commons.conf.extract.ExtractConf
29 | import com.abhioncbr.daflow.commons.conf.load.LoadConf
30 | import com.abhioncbr.daflow.commons.conf.transform.TransformConf
31 | import com.abhioncbr.daflow.job.conf.xml.NodeTags._
32 | import java.io._
33 | import javax.xml.XMLConstants
34 | import javax.xml.transform.stream.StreamSource
35 | import javax.xml.validation.SchemaFactory
36 | import org.apache.hadoop.conf.Configuration
37 | import org.apache.hadoop.fs.FileSystem
38 | import org.apache.hadoop.fs.Path
39 | import scala.util.Try
40 | import scala.xml.Node
41 |
42 | object DaFlowJob{
43 | def fromXML(node: scala.xml.NodeSeq): DaFlowJobConf = {
44 | DaFlowJobConf(ParseJobStaticParam.fromXML(node \ JOB_STATIC_PARAM),
45 | ParseExtract.fromXML(node \ EXTRACT),
46 | ParseTransform.fromXML(node \ TRANSFORM),
47 | ParseLoad.fromXML(node \ LOAD))
48 | }
49 | }
50 |
51 | class ParseDaFlowJobXml {
52 | def parseXml(path: String, loadFromHDFS: Boolean): Either[String, String] = {
53 | try {
54 | val reader: BufferedReader = if (loadFromHDFS) {
55 | val fs = FileSystem.get(Context.getContextualObject[Configuration](HADOOP_CONF))
56 | new BufferedReader(new InputStreamReader(fs.open(new Path(path))))
57 | } else { new BufferedReader(new InputStreamReader(new FileInputStream(path))) }
58 |
59 | val lines = Stream.continually(reader.readLine()).takeWhile(_ != null).toArray[String].mkString
60 | reader.close()
61 | Left(lines)
62 | } catch {
63 | case fileNotFoundException: FileNotFoundException => Right(s"${JXF(path)}. ${EM(fileNotFoundException)}".stripMargin)
64 | case exception: Exception => Right(s"$EWP ${EM(exception)}".stripMargin)
65 | }
66 | }
67 |
68 | def validateXml(xsdFile: String, xmlFile: String): Boolean = {
69 | Try({
70 | val factory: SchemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI)
71 | val schema = factory.newSchema(new StreamSource(new FileInputStream(xsdFile)))
72 | schema.newValidator().validate(new StreamSource(new FileInputStream(xmlFile)))
73 | true
74 | }).getOrElse(false)
75 | }
76 |
77 | def parseNode(node: scala.xml.Node): Either[(JobStaticParamConf, ExtractConf, TransformConf, LoadConf), String] = {
78 | val trimmedNode: Node = scala.xml.Utility.trim(node)
79 | trimmedNode match {
80 | case {_*} => val daFlowJob = DaFlowJob.fromXML(trimmedNode)
81 | Left((daFlowJob.jobStaticParam, daFlowJob.extract, daFlowJob.transform, daFlowJob.load))
82 | case _ => Right(UE)
83 | }
84 | }
85 | }
86 |
--------------------------------------------------------------------------------
/daflow-job-conf/daflow-job-conf-xml/src/main/scala/com/abhioncbr/daflow/job/conf/xml/ParseExtract.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.job.conf.xml
19 |
20 | import com.abhioncbr.daflow.commons.conf.common.DataPath
21 | import com.abhioncbr.daflow.commons.conf.common.QueryConf
22 | import com.abhioncbr.daflow.commons.conf.extract.ExtractConf
23 | import com.abhioncbr.daflow.commons.conf.extract.ExtractFeedConf
24 | import com.abhioncbr.daflow.commons.conf.extract.ExtractionType
25 | import com.abhioncbr.daflow.job.conf.xml.AttributeTags._
26 | import com.abhioncbr.daflow.job.conf.xml.NodeTags._
27 |
28 | object ParseExtract {
29 | def fromXML(node: scala.xml.NodeSeq): ExtractConf = {
30 | val extract: ExtractConf =
31 | ExtractConf(feeds = Array[ExtractFeedConf]((node \ FEED).toList map { s => ParseExtractFeed.fromXML(s) }: _*))
32 | extract
33 | }
34 | }
35 |
36 | object ParseExtractFeed {
37 | def fromXML(node: scala.xml.NodeSeq): ExtractFeedConf = {
38 | val feedName: String = (node \ FEED_NAME).text
39 | val validateExtractedData: Boolean = ParseUtil.parseBoolean((node \ VALIDATE_EXTRACTED_DATA).text)
40 |
41 | val extractionType: ExtractionType.valueType =
42 | ExtractionType.getValueType(valueTypeString = (node \ "_").head.label.toUpperCase)
43 |
44 | val attributesMap: Map[String, String] =
45 | (node \ "_").head.attributes.map(meta => (meta.key, meta.value.toString)).toMap
46 |
47 | val query: Option[QueryConf] = ParseUtil.parseNode[QueryConf](node \ JDBC \ QUERY, None, ParseQuery.fromXML)
48 | val dataPath: Option[DataPath] = ParseUtil.parseNode[DataPath](node \ FILE_SYSTEM \ DATA_PATH, None, ParseDataPath.fromXML)
49 |
50 | val feed: ExtractFeedConf = ExtractFeedConf(extractFeedName = feedName,
51 | extractionType = extractionType, extractionAttributesMap = attributesMap,
52 | dataPath = dataPath, query = query, validateExtractedData = validateExtractedData)
53 | feed
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/daflow-job-conf/daflow-job-conf-xml/src/main/scala/com/abhioncbr/daflow/job/conf/xml/ParseFieldMapping.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.job.conf.xml
19 |
20 | import com.abhioncbr.daflow.commons.conf.common.FieldMappingConf
21 | import com.abhioncbr.daflow.job.conf.xml.AttributeTags._
22 | import com.abhioncbr.daflow.job.conf.xml.NodeTags._
23 |
24 | object ParseFieldMappings {
25 | def fromXML(node: scala.xml.NodeSeq): List[FieldMappingConf] = {
26 | List[FieldMappingConf]((node \ FIELD_MAPPING).toList map { s => ParseFieldMapping.fromXML(s) }: _*)
27 | }
28 | }
29 |
30 | object ParseFieldMapping {
31 | def fromXML(node: scala.xml.NodeSeq): FieldMappingConf = {
32 | FieldMappingConf(sourceFieldName = (node \ SOURCE_NAME).text,
33 | targetFieldName = (node \ TARGET_NAME).text)
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/daflow-job-conf/daflow-job-conf-xml/src/main/scala/com/abhioncbr/daflow/job/conf/xml/ParseGeneralParams.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.job.conf.xml
19 |
20 | import com.abhioncbr.daflow.commons.conf.common.GeneralParamConf
21 | import com.abhioncbr.daflow.job.conf.xml.AttributeTags._
22 | import com.abhioncbr.daflow.job.conf.xml.NodeTags._
23 |
24 | object ParseGeneralParams {
25 | def fromXML(node: scala.xml.NodeSeq): Array[GeneralParamConf] = {
26 | Array[GeneralParamConf]((node \ PARAM).toList map { s => ParseGeneralParam.fromXML(s) }: _*)
27 | }
28 | }
29 |
30 | object ParseGeneralParam {
31 | def fromXML(node: scala.xml.NodeSeq): GeneralParamConf = {
32 | val order = ParseUtil.parseInt((node \ AttributeTags.ORDER).text)
33 | val paramName = (node \ NAME).text
34 | val paramValue = (node \ VALUE).text
35 | val paramDefaultValue = (node \ DEFAULT_VALUE).text
36 | GeneralParamConf(order, paramName, paramValue, paramDefaultValue)
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/daflow-job-conf/daflow-job-conf-xml/src/main/scala/com/abhioncbr/daflow/job/conf/xml/ParseJobStaticParam.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.job.conf.xml
19 |
20 | import com.abhioncbr.daflow.commons.ProcessFrequencyEnum
21 | import com.abhioncbr.daflow.commons.conf.JobStaticParamConf
22 | import com.abhioncbr.daflow.commons.conf.common.GeneralParamConf
23 | import com.abhioncbr.daflow.job.conf.xml.AttributeTags._
24 | import com.abhioncbr.daflow.job.conf.xml.NodeTags._
25 |
26 | object ParseJobStaticParam {
27 | def fromXML(node: scala.xml.NodeSeq): JobStaticParamConf = {
28 | JobStaticParamConf(processFrequency = ProcessFrequencyEnum.getProcessFrequencyEnum((node \ FREQUENCY).text),
29 | jobName = (node \ JOB_NAME).text,
30 | publishStats = ParseUtil.parseBoolean((node \ PUBLISH_STATS).text),
31 | otherParams = ParseUtil.parseNode[Array[GeneralParamConf]](node \ OTHER_PARAMS, None, ParseGeneralParams.fromXML)
32 | )
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/daflow-job-conf/daflow-job-conf-xml/src/main/scala/com/abhioncbr/daflow/job/conf/xml/ParseLoad.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.job.conf.xml
19 |
20 | import com.abhioncbr.daflow.commons.conf.common.DataPath
21 | import com.abhioncbr.daflow.commons.conf.load.LoadConf
22 | import com.abhioncbr.daflow.commons.conf.load.LoadFeedConf
23 | import com.abhioncbr.daflow.commons.conf.load.LoadType
24 | import com.abhioncbr.daflow.commons.conf.load.PartitioningDataConf
25 | import com.abhioncbr.daflow.job.conf.xml.AttributeTags._
26 | import com.abhioncbr.daflow.job.conf.xml.NodeTags._
27 |
28 | object ParseLoad {
29 | def fromXML(node: scala.xml.NodeSeq): LoadConf = {
30 | val load: LoadConf = LoadConf(feeds =
31 | Array[LoadFeedConf]((node \ FEED).toList map { s => ParseLoadFeed.fromXML(s) }: _*))
32 | load
33 | }
34 | }
35 |
36 | object ParseLoadFeed {
37 | def fromXML(node: scala.xml.NodeSeq): LoadFeedConf = {
38 | val loadFeedName: String = (node \ NAME).text
39 | val loadType: LoadType.valueType =
40 | LoadType.getValueType(valueTypeString = (node \ "_").head.label.toUpperCase)
41 |
42 | val attributesMap: Map[String, String] = (node \ "_").head.attributes.map(meta => (meta.key, meta.value.toString)).toMap
43 | val dataPath: DataPath = ParseUtil.parseNode[DataPath](node \ "_" \ DATA_PATH, None, ParseDataPath.fromXML).orNull
44 | val partitioningData: Option[PartitioningDataConf] =
45 | ParseUtil.parseNode[PartitioningDataConf](node \ HIVE \ PARTITION_DATA, None, ParsePartitioningData.fromXML)
46 |
47 | val feed: LoadFeedConf = LoadFeedConf(loadFeedName = loadFeedName,
48 | loadType = loadType, attributesMap = attributesMap, dataPath = dataPath, partitioningData = partitioningData)
49 |
50 | feed
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/daflow-job-conf/daflow-job-conf-xml/src/main/scala/com/abhioncbr/daflow/job/conf/xml/ParsePartitioningData.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.job.conf.xml
19 |
20 | import com.abhioncbr.daflow.commons.conf.common.GeneralParamConf
21 | import com.abhioncbr.daflow.commons.conf.load.PartitioningDataConf
22 | import com.abhioncbr.daflow.job.conf.xml.AttributeTags._
23 | import com.abhioncbr.daflow.job.conf.xml.NodeTags._
24 |
25 | object ParsePartitioningData {
26 | def fromXML(node: scala.xml.NodeSeq): PartitioningDataConf = {
27 | val coalesce = ParseUtil.parseBoolean((node \ COALESCE_PARTITION).text)
28 | val overwrite = ParseUtil.parseBoolean((node \ OVERWRITE_PARTITION).text)
29 | val coalesceCount = ParseUtil.parseInt((node \ COALESCE_PARTITION_COUNT).text)
30 | val partitionColumns = List[GeneralParamConf]((node \ PARTITION_COLUMNS \ COLUMN).
31 | toList map { s => ParseGeneralParam.fromXML(s) }: _*)
32 |
33 | PartitioningDataConf(coalesce = coalesce, overwrite = overwrite,
34 | coalesceCount = coalesceCount, partitionColumns = partitionColumns)
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/daflow-job-conf/daflow-job-conf-xml/src/main/scala/com/abhioncbr/daflow/job/conf/xml/ParseQuery.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.job.conf.xml
19 |
20 | import com.abhioncbr.daflow.commons.conf.common.DataPath
21 | import com.abhioncbr.daflow.commons.conf.common.GeneralParamConf
22 | import com.abhioncbr.daflow.commons.conf.common.QueryConf
23 | import com.abhioncbr.daflow.commons.conf.common.QueryFilesConf
24 | import com.abhioncbr.daflow.job.conf.xml.NodeTags._
25 |
26 | object ParseQuery {
27 | def fromXML(node: scala.xml.NodeSeq): QueryConf = {
28 | val configurationFile: Option[DataPath] =
29 | ParseUtil.parseNode[DataPath](node \ CONFIGURATION_FILE, None, ParseDataPath.fromXML)
30 |
31 | val queryFile: Option[DataPath] = ParseUtil.parseNode[DataPath](node \ SQL_QUERY_FILE, None, ParseDataPath.fromXML)
32 |
33 | val queryArgs: Option[Array[GeneralParamConf]] =
34 | ParseUtil.parseNode[Array[GeneralParamConf]](node \ QUERY_PARAMS, None, ParseGeneralParams.fromXML)
35 |
36 | val queryFiles: QueryFilesConf = QueryFilesConf(configurationFile = configurationFile, queryFile = queryFile)
37 | val query: QueryConf = QueryConf(queryFile = queryFiles, queryArgs = queryArgs)
38 | query
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/daflow-job-conf/daflow-job-conf-xml/src/main/scala/com/abhioncbr/daflow/job/conf/xml/ParseTransform.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package com.abhioncbr.daflow.job.conf.xml
18 |
19 | import com.abhioncbr.daflow.commons.conf.transform
20 | import com.abhioncbr.daflow.commons.conf.transform.TransformConf
21 | import com.abhioncbr.daflow.commons.conf.transform.TransformStepConf
22 | import com.abhioncbr.daflow.job.conf.xml.AttributeTags._
23 | import com.abhioncbr.daflow.job.conf.xml.NodeTags._
24 |
25 | object ParseTransform {
26 | def fromXML(node: scala.xml.NodeSeq): TransformConf = {
27 | val steps: List[TransformStepConf] =
28 | List[TransformStepConf]((node \ STEP).toList map { s => ParseTransformStep.fromXML(s) }: _*)
29 |
30 | transform.TransformConf(transformSteps = steps,
31 | validateTransformedData = ParseUtil.parseBoolean((node \ VALIDATE_TRANSFORMED_DATA).text))
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/daflow-job-conf/daflow-job-conf-xml/src/main/scala/com/abhioncbr/daflow/job/conf/xml/ParseTransformRule.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.job.conf.xml
19 |
20 | import com.abhioncbr.daflow.commons.conf.transform
21 | import com.abhioncbr.daflow.commons.conf.transform.TransformRuleConf
22 | import com.abhioncbr.daflow.commons.conf.transform.TransformStepConf
23 | import com.abhioncbr.daflow.job.conf.xml.AttributeTags._
24 | import com.abhioncbr.daflow.job.conf.xml.NodeTags._
25 |
26 | object ParseTransformStep {
27 | def fromXML(node: scala.xml.NodeSeq): TransformStepConf = {
28 | val order = ParseUtil.parseInt((node \ AttributeTags.ORDER).text)
29 |
30 | val rules: Map[String, TransformRuleConf] = List[TransformRuleConf]((node \ RULE).toList map {
31 | s => ParseTransformRule.fromXML(s)
32 | }: _*).map(rule => (rule.ruleAttributesMap(GROUP), rule)).toMap
33 |
34 | transform.TransformStepConf(order = order, rules = rules)
35 | }
36 | }
37 |
38 | object ParseTransformRule {
39 | def fromXML(node: scala.xml.NodeSeq): TransformRuleConf = {
40 | val ruleType = (node \ TYPE).text
41 | val condition = (node \ CONDITION).text
42 | val ruleAttributesMap: Map[String, String] = node.head.attributes.map(meta => (meta.key, meta.value.toString)).toMap
43 | TransformRuleConf(ruleType = ruleType, condition = condition, ruleAttributesMap = ruleAttributesMap)
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/daflow-job-conf/daflow-job-conf-xml/src/main/scala/com/abhioncbr/daflow/job/conf/xml/ParseUtil.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.job.conf.xml
19 |
20 | import com.abhioncbr.daflow.commons.conf.common.DataPath
21 | import com.abhioncbr.daflow.commons.util.FileUtil
22 | import scala.util.Try
23 |
24 | object ParseUtil {
25 | def parseNode[T](node: scala.xml.NodeSeq, defaultValue: Option[T], fun: scala.xml.NodeSeq => T): Option[T]
26 | = if (node.nonEmpty){ Some(fun(node)) } else { defaultValue }
27 |
28 | def parseNodeText(node: scala.xml.NodeSeq): String = node.text
29 |
30 | def parseBoolean(node: scala.xml.NodeSeq): Boolean = parseBoolean(node.text)
31 | def parseBoolean(text: String): Boolean = Try(text.toBoolean).getOrElse(false)
32 |
33 | def parseInt(node: scala.xml.NodeSeq): Int = parseInt(node.text)
34 | def parseInt(text: String): Int = Try(text.toInt).getOrElse(-1)
35 |
36 | def parseFilePathString(node: scala.xml.NodeSeq): Either[DataPath, String] = parseFilePathString(node.text)
37 | def parseFilePathString(text: String, fileNameSeparator: String = "."): Either[DataPath, String]
38 | = FileUtil.getFilePathObject(text, fileNameSeparator)
39 | }
40 |
--------------------------------------------------------------------------------
/daflow-job-conf/daflow-job-conf-xml/src/test/scala/com/abhioncbr/daflow/job/conf/xml/ParseDaFlowJobXmlSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.job.conf.xml
19 |
20 | class ParseDaFlowJobXmlSpec extends XmlJobConfBase{
21 | "validateXml" should "return true when valid xml file is provided as input" in {
22 | val xsdFile = xsdFilePath
23 | val xmlFile = s"$daflowExampleDemoJobXmlPath/json_etl_example.xml"
24 | val parse: ParseDaFlowJobXml = new ParseDaFlowJobXml
25 | val output: Boolean = parse.validateXml(xsdFile, xmlFile)
26 |
27 | output should not equal None
28 | output should be (true)
29 | }
30 |
31 | "validateXml" should "return true when valid xml jdbc_template file is provided as input" in {
32 | val xsdFile = xsdFilePath
33 | val xmlFile = s"$daflowExampleJobXmlTemplatePath/extract_jdbc_import.xml"
34 | val parse: ParseDaFlowJobXml = new ParseDaFlowJobXml
35 | val output: Boolean = parse.validateXml(xsdFile, xmlFile)
36 |
37 | output should not equal None
38 | output should be (true)
39 | }
40 |
41 | "validateXml" should "return true when valid xml json_template file is provided as input" in {
42 | val xsdFile = xsdFilePath
43 | val xmlFile = s"$daflowExampleJobXmlTemplatePath/extract_json_import.xml"
44 | val parse: ParseDaFlowJobXml = new ParseDaFlowJobXml
45 | val output: Boolean = parse.validateXml(xsdFile, xmlFile)
46 |
47 | output should not equal None
48 | output should be (true)
49 | }
50 |
51 | "validateXml" should "return true when valid xml multiple_group_template file is provided as input" in {
52 | val xsdFile = xsdFilePath
53 | val xmlFile = s"$daflowExampleJobXmlTemplatePath/multiple_group_name.xml"
54 | val parse: ParseDaFlowJobXml = new ParseDaFlowJobXml
55 | val output: Boolean = parse.validateXml(xsdFile, xmlFile)
56 |
57 | output should not equal None
58 | output should be (true)
59 | }
60 |
61 | "validateXml" should "return true when valid xml multiple_transform_template file is provided as input" in {
62 | val xsdFile = xsdFilePath
63 | val xmlFile = s"$daflowExampleJobXmlTemplatePath/multiple_transform_rule.xml"
64 | val parse: ParseDaFlowJobXml = new ParseDaFlowJobXml
65 | val output: Boolean = parse.validateXml(xsdFile, xmlFile)
66 |
67 | output should not equal None
68 | output should be (true)
69 | }
70 |
71 | }
72 |
--------------------------------------------------------------------------------
/daflow-job-conf/daflow-job-conf-xml/src/test/scala/com/abhioncbr/daflow/job/conf/xml/ParseFieldMappingSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.job.conf.xml
19 |
20 | import com.abhioncbr.daflow.commons.conf.common.FieldMappingConf
21 |
22 | class ParseFieldMappingSpec extends XmlJobConfBase {
23 |
24 | "ParseFieldMapping-fromXML" should "return FieldMapping object" in {
25 | val xmlContent: String = """"""
26 | val filedMappingObject: FieldMappingConf = ParseFieldMapping.fromXML(node(xmlContent))
27 | filedMappingObject should not equal None
28 | filedMappingObject.sourceFieldName should be ("source")
29 | filedMappingObject.targetFieldName should be ("target")
30 | }
31 |
32 | "ParseFieldMappings-fromXML" should "return FieldMapping object" in {
33 | val xmlContent: String =
34 | """
35 | |
36 | |
37 | |
38 | |""".stripMargin
39 | val filedMappingArrayObject: List[FieldMappingConf] = ParseFieldMappings.fromXML(node(xmlContent))
40 | filedMappingArrayObject should not equal None
41 | filedMappingArrayObject.length should be (3)
42 | filedMappingArrayObject.head.sourceFieldName should be ("source1")
43 | filedMappingArrayObject.head.targetFieldName should be ("target1")
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/daflow-job-conf/daflow-job-conf-xml/src/test/scala/com/abhioncbr/daflow/job/conf/xml/ParseJobStaticParamSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.job.conf.xml
19 |
20 | import com.abhioncbr.daflow.commons.conf.JobStaticParamConf
21 |
22 | class ParseJobStaticParamSpec extends XmlJobConfBase {
23 |
24 | "ParseJobStaticParam-fromXML" should "return JobStaticParam object" in {
25 | val xmlContent: String = """"""
26 | val jobStaticParamObject: JobStaticParamConf = ParseJobStaticParam.fromXML(node(xmlContent))
27 | jobStaticParamObject should not equal None
28 | jobStaticParamObject.jobName should be ("Job1")
29 | jobStaticParamObject.processFrequency.toString should be ("ONCE")
30 | jobStaticParamObject.publishStats should be (false)
31 | }
32 |
33 | "ParseJobStaticParam-fromXML" should "return JobStaticParam object with otherParams also" in {
34 | val xmlContent: String =
35 | """
36 | |
37 | |
38 | |
39 | |""".stripMargin
40 | val jobStaticParamObject: JobStaticParamConf = ParseJobStaticParam.fromXML(node(xmlContent))
41 | jobStaticParamObject should not equal None
42 | jobStaticParamObject.jobName should be ("Job1")
43 | jobStaticParamObject.processFrequency.toString should be ("ONCE")
44 | jobStaticParamObject.publishStats should be (false)
45 | jobStaticParamObject.otherParams should not equal None
46 | jobStaticParamObject.otherParams.get should not equal None
47 | jobStaticParamObject.otherParams.get.length should be (2)
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/daflow-job-conf/daflow-job-conf-xml/src/test/scala/com/abhioncbr/daflow/job/conf/xml/ParsePartitioningRuleSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.job.conf.xml
19 |
20 | import com.abhioncbr.daflow.commons.conf.load.PartitioningDataConf
21 |
22 | class ParsePartitioningRuleSpec extends XmlJobConfBase {
23 |
24 | "ParsePartitioningData" should "return PartitioningData object with all variable having values" in {
25 | val xmlContent = """
26 |
27 |
28 |
29 | """.stripMargin
30 | val partitioningDataObject: PartitioningDataConf = ParsePartitioningData.fromXML(node(xmlContent))
31 | partitioningDataObject should not equal None
32 | partitioningDataObject.coalesce should be (true)
33 | partitioningDataObject.overwrite should be (true)
34 | partitioningDataObject.coalesceCount should be (10)
35 | partitioningDataObject.partitionColumns should not be None
36 | partitioningDataObject.partitionColumns.length should be (1)
37 | partitioningDataObject.partitionColumns.head.paramName should be ("Date")
38 | partitioningDataObject.partitionColumns.head.paramValue should be ("date")
39 | }
40 |
41 | "ParsePartitioningData" should "return PartitioningData object with only provided variables" in {
42 | val xmlContent = """
43 |
44 |
45 |
46 | """.stripMargin
47 | val partitioningDataObject: PartitioningDataConf = ParsePartitioningData.fromXML(node(xmlContent))
48 | partitioningDataObject should not equal None
49 | partitioningDataObject.coalesce should be (false)
50 | partitioningDataObject.overwrite should be (true)
51 | partitioningDataObject.coalesceCount should be (-1)
52 | partitioningDataObject.partitionColumns should not be None
53 | partitioningDataObject.partitionColumns.length should be (1)
54 | partitioningDataObject.partitionColumns.head.paramName should be ("Date")
55 | partitioningDataObject.partitionColumns.head.paramValue should be ("date")
56 | }
57 |
58 | }
59 |
--------------------------------------------------------------------------------
/daflow-job-conf/daflow-job-conf-xml/src/test/scala/com/abhioncbr/daflow/job/conf/xml/ParseTransformSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.job.conf.xml
19 |
20 | import com.abhioncbr.daflow.commons.conf.transform.TransformConf
21 |
22 | class ParseTransformSpec extends XmlJobConfBase {
23 |
24 | "ParseTransform" should "return TransformConf object with array of TransformStepsConf" in {
25 | val xmlContent = """
26 | {col1} like 'my%'
27 |
28 | """
29 |
30 | val transformConfObject: TransformConf = ParseTransform.fromXML(node(xmlContent))
31 | transformConfObject should not equal None
32 | transformConfObject.validateTransformedData should be (false)
33 | transformConfObject.transformSteps.size should be (1)
34 | transformConfObject.transformSteps.head.order should be (23)
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/daflow-job-conf/daflow-job-conf-xml/src/test/scala/com/abhioncbr/daflow/job/conf/xml/XmlJobConfBase.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.job.conf.xml
19 |
20 | import com.abhioncbr.daflow.commons.CommonSpec
21 | import scala.xml.XML
22 |
23 | class XmlJobConfBase extends CommonSpec{
24 |
25 | val userDirectory: String = System.getProperty("user.dir")
26 |
27 | val daflowExamplesPath = s"$userDirectory/daflow-examples"
28 | val daflowExamplesDemoPath = s"$daflowExamplesPath/demo"
29 | val daflowExamplesDemoSampleDataPath = s"$daflowExamplesDemoPath/sample-data"
30 | val daflowExampleDemoJobXmlPath = s"$daflowExamplesDemoPath/daflow-job-xml"
31 | val daflowExampleJobXmlTemplatePath = s"$daflowExamplesPath/daflow-xml-templates"
32 |
33 | val xsdFilePath = s"$userDirectory/daflow-job-conf/daflow-job-conf-xml/daflow-feed-job.xsd"
34 |
35 | val node: String => scala.xml.NodeSeq = (xmlContent: String) => { XML.loadString(xmlContent) }
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/daflow-job-conf/daflow-job-conf-yaml/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | com.abhioncbr.daflow
7 | daflow
8 | ${revision}
9 | ../../pom.xml
10 |
11 | 4.0.0
12 |
13 | jar
14 | daflow-job-conf-yaml
15 | daflow-job-conf-yaml
16 | ${daflow.job.conf.yaml.version}
17 |
18 |
19 |
20 | Apache License, Version 2.0
21 | http://www.apache.org/licenses/LICENSE-2.0.txt
22 | repo
23 | A business-friendly OSS license
24 |
25 |
26 |
27 |
28 |
29 | scala-tools.org
30 | Scala-Tools Maven2 Repository
31 | http://scala-tools.org/repo-releases
32 |
33 |
34 |
35 |
36 |
37 | scala-tools.org
38 | Scala-Tools Maven2 Repository
39 | http://scala-tools.org/repo-releases
40 |
41 |
42 |
43 |
44 | 0.11.1
45 | 0.10.0
46 |
47 |
48 |
49 |
50 | com.abhioncbr.daflow
51 | daflow-commons
52 | ${daflow.common.version}
53 |
54 |
55 |
56 | io.circe
57 | circe-yaml_2.11
58 | ${circe-yaml.version}
59 |
60 |
61 |
62 | io.circe
63 | circe-generic_2.11
64 | ${circe-generic.version}
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 | org.scala-tools
75 | maven-scala-plugin
76 |
77 | ${scala.version}
78 |
79 |
80 |
81 |
82 |
83 |
--------------------------------------------------------------------------------
/daflow-metrics/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | com.abhioncbr.daflow
7 | daflow
8 | ${revision}
9 |
10 | 4.0.0
11 |
12 | jar
13 | daflow-metrics
14 | daflow-metrics
15 | ${daflow.metrics.version}
16 |
17 |
18 |
19 | Apache License, Version 2.0
20 | http://www.apache.org/licenses/LICENSE-2.0.txt
21 | repo
22 | A business-friendly OSS license
23 |
24 |
25 |
26 |
27 |
28 | scala-tools.org
29 | Scala-Tools Maven2 Repository
30 | http://scala-tools.org/repo-releases
31 |
32 |
33 |
34 |
35 |
36 | scala-tools.org
37 | Scala-Tools Maven2 Repository
38 | http://scala-tools.org/repo-releases
39 |
40 |
41 |
42 |
43 | 0.6.0
44 |
45 |
46 |
47 |
48 | com.abhioncbr.daflow
49 | daflow-commons
50 | ${daflow.core.version}
51 |
52 |
53 |
54 | io.prometheus
55 | simpleclient
56 | ${prometheusVersion}
57 |
58 |
59 |
60 | io.prometheus
61 | simpleclient_servlet
62 | ${prometheusVersion}
63 |
64 |
65 |
66 | io.prometheus
67 | simpleclient_pushgateway
68 | ${prometheusVersion}
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 | org.scala-tools
78 | maven-scala-plugin
79 |
80 | ${scala.version}
81 |
82 |
83 |
84 |
85 |
86 |
--------------------------------------------------------------------------------
/daflow-metrics/scripts/daflow-feed-stat.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | JOB_NAME=$1
4 | JOB_SUBTASK_NAME=$2
5 |
6 | STATUS=$3
7 | FREQUENCY=$4
8 | DATE=$5
9 | HOUR=$6
10 |
11 | V_PASSED_COUNT=$7
12 | V_FAILED_COUNT=$8
13 | EXECUTION_TIME=${9}
14 |
15 | T_PASSED_COUNT=${10}
16 | T_FAILED_COUNT=${11}
17 | FAILURE_REASON="${12}"
18 |
19 | echo "updating daflow feed stat table for job=$JOB_NAME, job_subtask=$JOB_SUBTASK_NAME, venture=$VENTURE, date=$DATE"
20 | QUERY="INSERT INTO TABLE daflow.daflow-feed-stat PARTITION (job_name = '$JOB_NAME') (job_subtask, status, frequency,
21 | data_date, data_hour, schema_validation_passed_data_count, schema_validation_failed_data_count, feed_execution_time, transformation_passed_data_count, transformation_failed_data_count, failure_reason) VALUES ('$JOB_SUBTASK_NAME', '$STATUS', '$FREQUENCY', '$DATE', '$HOUR', $V_PASSED_COUNT, $V_FAILED_COUNT, $EXECUTION_TIME, $T_PASSED_COUNT, $T_FAILED_COUNT, '$FAILURE_REASON');"
22 | echo "Going to execute query: $QUERY"
23 |
24 | hive -e "
25 | SET mapred.job.queue.name=pipelines;
26 | $QUERY
27 | "
28 |
29 | exit_code=$?
30 | exit ${exit_code}
--------------------------------------------------------------------------------
/daflow-metrics/sql/daflow-feed-stat:
--------------------------------------------------------------------------------
1 | CREATE EXTERNAL TABLE daflow_feed_stat (
2 | job_subtask String,
3 | status string,
4 | frequency string,
5 | data_date date,
6 | data_hour string,
7 | failure_reason string,
8 | transformation_passed_data_count bigint,
9 | transformation_failed_data_count bigint,
10 | schema_validation_passed_data_count bigint,
11 | schema_validation_failed_data_count bigint,
12 | feed_execution_time bigint)
13 | PARTITIONED BY (job_name string)
14 | STORED AS parquetfile
15 | LOCATION "/data/daflow_data/daflow_feed_stat"
--------------------------------------------------------------------------------
/daflow-metrics/src/main/scala/com/abhioncbr/daflow/metrics/promethus/PrometheusObject.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.metrics.promethus
19 |
20 | import com.abhioncbr.daflow.commons.NotificationMessages
21 | import com.typesafe.scalalogging.Logger
22 | import io.prometheus.client.CollectorRegistry
23 | import io.prometheus.client.Gauge
24 | import io.prometheus.client.exporter.PushGateway
25 | import scala.util.Failure
26 | import scala.util.Success
27 | import scala.util.Try
28 |
29 | class PrometheusObject(feedName: String, pushGatewayIpAddress: String) {
30 | private val logger = Logger(this.getClass)
31 |
32 | @transient lazy val feedDataStatGauge: Gauge = Gauge.build()
33 | .name(feedName).help(s"number of entries for a given $feedName").register()
34 |
35 | def pushMetrics(metricsJobName: String, metricData: Long): Either[Unit, String] = {
36 | @transient val conf: Map[String, String] = Map()
37 | val pushGatewayAddress = conf.getOrElse("pushGatewayAddr", pushGatewayIpAddress)
38 | val pushGateway = new PushGateway(pushGatewayAddress)
39 |
40 | feedDataStatGauge.labels(feedName).set(metricData)
41 |
42 | val output: Either[Unit, String] = Try(pushGateway.push(CollectorRegistry.defaultRegistry, metricsJobName)) match {
43 | case Success(u: Unit) => Left(u)
44 | case Failure(ex: Exception) => val str = s"Unable to push metrics. ${NotificationMessages.exceptionMessage(ex)}"
45 | logger.warn(str)
46 | Right(str)
47 | }
48 | output
49 | }
50 |
51 | }
52 |
--------------------------------------------------------------------------------
/daflow-metrics/src/main/scala/com/abhioncbr/daflow/metrics/stats/JobResult.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.abhioncbr.daflow.metrics.stats
19 |
20 | case class JobResult(success: Boolean, feedName: String, transformationPassedCount: Long,
21 | transformationFailedCount: Long, validateCount: Long, nonValidatedCount: Long, failureReason: String)
22 |
--------------------------------------------------------------------------------
/daflow-sql-parser/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sparsecode/DaFlow/068ec7582ef58cbc6c782d2bc19a43aca2dead81/daflow-sql-parser/README.md
--------------------------------------------------------------------------------
/daflow-sql-parser/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | com.abhioncbr.daflow
7 | daflow
8 | ${revision}
9 |
10 | 4.0.0
11 |
12 | jar
13 | daflow-sql-parser
14 | daflow-sql-parser
15 | ${daflow.sql.parser.version}
16 |
17 |
18 |
19 | Apache License, Version 2.0
20 | http://www.apache.org/licenses/LICENSE-2.0.txt
21 | repo
22 | A business-friendly OSS license
23 |
24 |
25 |
26 |
27 |
28 | scala-tools.org
29 | Scala-Tools Maven2 Repository
30 | http://scala-tools.org/repo-releases
31 |
32 |
33 |
34 |
35 |
36 | scala-tools.org
37 | Scala-Tools Maven2 Repository
38 | http://scala-tools.org/repo-releases
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 | org.scala-tools
53 | maven-scala-plugin
54 |
55 | ${scala.version}
56 |
57 |
58 |
59 |
60 |
61 |
--------------------------------------------------------------------------------
/daflow-sql-parser/src/main/scala/com/abhioncbr/daflow/sqlParser/QueryDsl.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package com.abhioncbr.daflow.sqlParser
18 |
19 | abstract class Operation { def from(table: String): From = From(table, Option(this)) }
20 | case class Select(fields: String*) extends Operation
21 | case class From(table: String, operation: Option[Operation] = None) {
22 | def where(clauses: Clause*): Query = Query(from = this, operation = operation.get, Option(Where(clauses: _*)))
23 | }
24 |
25 | case class Query(from: From, operation: Operation, where: Option[Where], order: Option[Direction] = None) {
26 | def order(dir: Direction): Query = this.copy(order = Option(dir))
27 | }
28 |
29 | case class Where(clauses: Clause*)
30 |
31 | abstract class Clause {
32 | def hasField: Boolean
33 | def getFields: Map[String, Any]
34 | def and(otherField: Clause): Clause = And(this, otherField)
35 | def or(otherField: Clause): Clause = Or(this, otherField)
36 | }
37 | abstract class ValueClause(t: String, f: String, values: Any*) extends Clause {
38 | override def hasField: Boolean = true
39 | override def getFields: Map[String, Any] = Map("type" -> t, "field" -> f, "value" -> values)
40 | }
41 | abstract class ExpressionValueClause(t: String, f: String, expr: String, values: Any*) extends ValueClause(t, f,
42 | values) {
43 | override def hasField: Boolean = true
44 | override def getFields: Map[String, Any] = Map("type" -> t, "field" -> f, "value" -> values, "expr" -> expr)
45 | }
46 | abstract class ReferenceClause(t: String, lClause: Clause, rClause: Clause) extends Clause {
47 | override def hasField: Boolean = false
48 | override def getFields: Map[String, Any] = Map("type" -> t, "lClause" -> lClause, "rClause" -> rClause)
49 | }
50 |
51 | case class Null(f: String) extends ValueClause("Null", f)
52 | case class NotNull(f: String) extends ValueClause("NotNull", f)
53 | case class Like(f: String, value: Any) extends ValueClause("Like", f, value)
54 | case class In(f: String, values: String*) extends ValueClause("in", f, values)
55 | case class Between(f: String, values: Any*) extends ValueClause("Between", f, values)
56 | case class StringExpressions(f: String, expr: String, value: String)
57 | extends ExpressionValueClause("stringEquals", f, expr: String, value)
58 | case class NumberExpressions(f: String, expr: String, value: Number)
59 | extends ExpressionValueClause("numberEquals", f, expr: String, value)
60 | case class BooleanExpressions(f: String, expr: String, value: Boolean)
61 | extends ExpressionValueClause("booleanEquals", f, expr, value)
62 |
63 | case class And(lClause: Clause, rClause: Clause) extends ReferenceClause("and", lClause, rClause)
64 | case class Or(lClause: Clause, rClause: Clause) extends ReferenceClause("or", lClause, rClause)
65 |
66 | abstract class Direction
67 | case class Asc(field: String) extends Direction
68 | case class Desc(field: String) extends Direction
69 |
--------------------------------------------------------------------------------
/docker/compose/hadoop.env:
--------------------------------------------------------------------------------
1 | HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://daflow-hive-metastore-postgresql/metastore
2 | HIVE_SITE_CONF_javax_jdo_option_ConnectionDriverName=org.postgresql.Driver
3 | HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive
4 | HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive
5 | HIVE_SITE_CONF_datanucleus_autoCreateSchema=false
6 | HIVE_SITE_CONF_hive_metastore_uris=thrift://daflow-hivemetastore:9083
7 | HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false
8 |
9 | HDFS_CONF_dfs_webhdfs_enabled=true
10 | HDFS_CONF_dfs_permissions_enabled=false
11 | #HDFS_CONF_dfs_client_use_datanode_hostname=true
12 | #HDFS_CONF_dfs_namenode_use_datanode_hostname=true
13 |
14 | CORE_CONF_fs_defaultFS=hdfs://daflow-namenode:8020
15 | CORE_CONF_hadoop_http_staticuser_user=root
16 | CORE_CONF_hadoop_proxyuser_hue_hosts=*
17 | CORE_CONF_hadoop_proxyuser_hue_groups=*
18 |
19 | YARN_CONF_yarn_log___aggregation___enable=true
20 | YARN_CONF_yarn_resourcemanager_recovery_enabled=true
21 | YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
22 | YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate
23 | YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs
24 | YARN_CONF_yarn_log_server_url=http://daflow-historyserver:8188/applicationhistory/logs/
25 | YARN_CONF_yarn_timeline___service_enabled=true
26 | YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true
27 | YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true
28 | YARN_CONF_yarn_resourcemanager_hostname=daflow-resourcemanager
29 | YARN_CONF_yarn_timeline___service_hostname=daflow-historyserver
30 | YARN_CONF_yarn_resourcemanager_address=daflow-resourcemanager:8032
31 | YARN_CONF_yarn_resourcemanager_scheduler_address=daflow-resourcemanager:8030
32 | YARN_CONF_yarn_resourcemanager_resource___tracker_address=daflow-resourcemanager:8031
33 | YARN_CONF_yarn_nodemanager_vmem___check___enabled=false
34 |
--------------------------------------------------------------------------------
/docker/images/hadoop/base/Dockerfile:
--------------------------------------------------------------------------------
1 | #COPIED FROM -> https://github.com/apache/incubator-hudi/blob/master/docker/hoodie/hadoop/base/Dockerfile
2 | #Also idea from, https://github.com/big-data-europe/docker-hadoop
3 |
4 | FROM openjdk:8u212-jdk-slim-stretch
5 | MAINTAINER DaFlow
6 | USER root
7 |
8 | # Default to UTF-8 file.encoding
9 | ENV LANG C.UTF-8
10 |
11 | ARG HADOOP_VERSION=2.8.4
12 | ARG HADOOP_URL=https://www.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz
13 | ENV HADOOP_VERSION ${HADOOP_VERSION}
14 | ENV HADOOP_URL ${HADOOP_URL}
15 |
16 | RUN set -x \
17 | && DEBIAN_FRONTEND=noninteractive apt-get -yq update && apt-get -yq install curl wget netcat procps \
18 | && echo "Fetch URL2 is : ${HADOOP_URL}" \
19 | && curl -fSL "${HADOOP_URL}" -o /tmp/hadoop.tar.gz \
20 | && curl -fSL "${HADOOP_URL}.asc" -o /tmp/hadoop.tar.gz.asc \
21 | && mkdir -p /opt/hadoop-$HADOOP_VERSION/logs \
22 | && tar -xvf /tmp/hadoop.tar.gz -C /opt/ \
23 | && rm /tmp/hadoop.tar.gz* \
24 | && ln -s /opt/hadoop-$HADOOP_VERSION/etc/hadoop /etc/hadoop \
25 | && cp /etc/hadoop/mapred-site.xml.template /etc/hadoop/mapred-site.xml \
26 | && mkdir /hadoop-data
27 |
28 | ENV HADOOP_PREFIX=/opt/hadoop-$HADOOP_VERSION
29 | ENV HADOOP_CONF_DIR=/etc/hadoop
30 | ENV MULTIHOMED_NETWORK=1
31 | ENV HADOOP_HOME=${HADOOP_PREFIX}
32 | ENV HADOOP_INSTALL=${HADOOP_HOME}
33 | ENV USER=root
34 | ENV PATH /usr/bin:/bin:$HADOOP_PREFIX/bin/:$PATH
35 |
36 | # Exposing a union of ports across hadoop versions
37 | # Well known ports including ssh
38 | EXPOSE 0-1024 4040 7000-10100 5000-5100 50000-50200 58188 58088 58042
39 |
40 | ADD entrypoint.sh /entrypoint.sh
41 | ADD export_container_ip.sh /usr/bin/
42 | RUN chmod a+x /usr/bin/export_container_ip.sh \
43 | && chmod a+x /entrypoint.sh
44 |
45 | ENTRYPOINT ["/bin/bash", "/entrypoint.sh"]
--------------------------------------------------------------------------------
/docker/images/hadoop/base/entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #######################################################################################
4 | ## COPIED FROM ##
5 | ## https://github.com/big-data-europe/docker-hadoop/blob/master/base/entrypoint.sh ##
6 | ## ##
7 | #######################################################################################
8 |
9 | # Set some sensible defaults
10 | export CORE_CONF_fs_defaultFS=${CORE_CONF_fs_defaultFS:-hdfs://`hostname -f`:8020}
11 |
12 | function addProperty() {
13 | local path=$1
14 | local name=$2
15 | local value=$3
16 |
17 | local entry="$name${value}"
18 | local escapedEntry=$(echo $entry | sed 's/\//\\\//g')
19 | sed -i "/<\/configuration>/ s/.*/${escapedEntry}\n&/" $path
20 | }
21 |
22 | function configure() {
23 | local path=$1
24 | local module=$2
25 | local envPrefix=$3
26 |
27 | local var
28 | local value
29 |
30 | echo "Configuring $module"
31 | for c in `printenv | perl -sne 'print "$1 " if m/^${envPrefix}_(.+?)=.*/' -- -envPrefix=$envPrefix`; do
32 | name=`echo ${c} | perl -pe 's/___/-/g; s/__/@/g; s/_/./g; s/@/_/g;'`
33 | var="${envPrefix}_${c}"
34 | value=${!var}
35 | echo " - Setting $name=$value"
36 | addProperty /etc/hadoop/$module-site.xml $name "$value"
37 | done
38 | }
39 |
40 | configure /etc/hadoop/core-site.xml core CORE_CONF
41 | configure /etc/hadoop/hdfs-site.xml hdfs HDFS_CONF
42 | configure /etc/hadoop/yarn-site.xml yarn YARN_CONF
43 | configure /etc/hadoop/httpfs-site.xml httpfs HTTPFS_CONF
44 | configure /etc/hadoop/kms-site.xml kms KMS_CONF
45 |
46 | if [ "$MULTIHOMED_NETWORK" = "1" ]; then
47 | echo "Configuring for multihomed network"
48 |
49 | # HDFS
50 | addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.rpc-bind-host 0.0.0.0
51 | addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.servicerpc-bind-host 0.0.0.0
52 | addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.http-bind-host 0.0.0.0
53 | addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.https-bind-host 0.0.0.0
54 | addProperty /etc/hadoop/hdfs-site.xml dfs.client.use.datanode.hostname true
55 | addProperty /etc/hadoop/hdfs-site.xml dfs.datanode.use.datanode.hostname true
56 |
57 | # YARN
58 | addProperty /etc/hadoop/yarn-site.xml yarn.resourcemanager.bind-host 0.0.0.0
59 | addProperty /etc/hadoop/yarn-site.xml yarn.nodemanager.bind-host 0.0.0.0
60 | addProperty /etc/hadoop/yarn-site.xml yarn.nodemanager.bind-host 0.0.0.0
61 | addProperty /etc/hadoop/yarn-site.xml yarn.timeline-service.bind-host 0.0.0.0
62 |
63 | # MAPRED
64 | addProperty /etc/hadoop/mapred-site.xml yarn.nodemanager.bind-host 0.0.0.0
65 | fi
66 |
67 | if [ -n "$GANGLIA_HOST" ]; then
68 | mv /etc/hadoop/hadoop-metrics.properties /etc/hadoop/hadoop-metrics.properties.orig
69 | mv /etc/hadoop/hadoop-metrics2.properties /etc/hadoop/hadoop-metrics2.properties.orig
70 |
71 | for module in mapred jvm rpc ugi; do
72 | echo "$module.class=org.apache.hadoop.metrics.ganglia.GangliaContext31"
73 | echo "$module.period=10"
74 | echo "$module.servers=$GANGLIA_HOST:8649"
75 | done > /etc/hadoop/hadoop-metrics.properties
76 |
77 | for module in namenode datanode resourcemanager nodemanager mrappmaster jobhistoryserver; do
78 | echo "$module.sink.ganglia.class=org.apache.hadoop.metrics2.sink.ganglia.GangliaSink31"
79 | echo "$module.sink.ganglia.period=10"
80 | echo "$module.sink.ganglia.supportsparse=true"
81 | echo "$module.sink.ganglia.slope=jvm.metrics.gcCount=zero,jvm.metrics.memHeapUsedM=both"
82 | echo "$module.sink.ganglia.dmax=jvm.metrics.threadsBlocked=70,jvm.metrics.memHeapUsedM=40"
83 | echo "$module.sink.ganglia.servers=$GANGLIA_HOST:8649"
84 | done > /etc/hadoop/hadoop-metrics2.properties
85 | fi
86 |
87 | # Save Container IP in ENV variable
88 | /usr/bin/export_container_ip.sh
89 |
90 | exec "$@"
91 |
--------------------------------------------------------------------------------
/docker/images/hadoop/base/export_container_ip.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #############################################################################################################
4 | ## COPIED FROM ##
5 | ## https://github.com/apache/incubator-hudi/blob/master/docker/hoodie/hadoop/base/export_container_ip.sh ##
6 | ## ##
7 | #############################################################################################################
8 |
9 | interfaces=( "en0" "eth0" )
10 |
11 | ipAddr=""
12 | for interface in "${interfaces[@]}"
13 | do
14 | ipAddr=`ifconfig ${interface} | grep -Eo 'inet (addr:)?([0-9]+\.){3}[0-9]+' | grep -Eo '([0-9]+\.){3}[0-9]+' | grep -v '127.0.0.1' | head`
15 | if [[ -n "$ipAddr" ]]; then
16 | break
17 | fi
18 | done
19 |
20 | echo "Container IP is set to : $ipAddr"
21 | export MY_CONTAINER_IP=${ipAddr}
22 |
--------------------------------------------------------------------------------
/docker/images/hadoop/base/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | com.abhioncbr.daflow
4 | daflow-docker
5 | ${revision}
6 | ../../pom.xml
7 |
8 |
9 | pom
10 | ${daflow.docker.hadoop.base.version}
11 | 4.0.0
12 | daflow-hadoop-base-docker
13 | DaFlow's Docker Image of Hadoop Base
14 |
15 |
16 | UTF-8
17 |
18 |
19 |
20 |
21 | com.abhioncbr.daflow
22 | daflow-docker
23 | ${project.version}
24 | pom
25 | import
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 | com.spotify
35 | dockerfile-maven-plugin
36 | ${dockerfile.maven.version}
37 |
38 |
39 | tag-latest
40 | pre-integration-test
41 |
42 | build
43 | tag
44 | push
45 |
46 |
47 | latest
48 | ${docker.build.skip}
49 | false
50 | ${docker.reg.username}
51 | ${docker.reg.password}
52 | abhioncbr/daflow-hadoop-base
53 |
54 | ${docker.hadoop.version}
55 |
56 |
57 |
58 |
59 | tag-version
60 | pre-integration-test
61 |
62 | build
63 | tag
64 | push
65 |
66 |
67 | ${docker.hadoop.version}
68 | ${docker.build.skip}
69 | false
70 | ${docker.reg.username}
71 | ${docker.reg.password}
72 | abhioncbr/daflow-hadoop-base
73 |
74 | ${docker.hadoop.version}
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
--------------------------------------------------------------------------------
/docker/images/hadoop/datanode/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG HADOOP_VERSION=2.8.4
2 | ARG HADOOP_DN_PORT=50075
3 | FROM abhioncbr/daflow-hadoop-base:${HADOOP_VERSION}
4 |
5 | ENV HADOOP_DN_PORT ${HADOOP_DN_PORT}
6 |
7 | ENV HDFS_CONF_dfs_datanode_data_dir=file:///hadoop/dfs/data
8 | RUN mkdir -p /hadoop/dfs/data
9 | VOLUME /hadoop/dfs/data
10 |
11 | ADD run_dn.sh /run_dn.sh
12 | RUN chmod a+x /run_dn.sh
13 |
14 | CMD ["/run_dn.sh"]
--------------------------------------------------------------------------------
/docker/images/hadoop/datanode/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | com.abhioncbr.daflow
4 | daflow-docker
5 | ${revision}
6 | ../../pom.xml
7 |
8 |
9 | pom
10 | ${daflow.docker.hadoop.datanode.version}
11 | 4.0.0
12 | daflow-hadoop-datanode-docker
13 | DaFlow's Docker Image of Hadoop Data Node.
14 |
15 |
16 | UTF-8
17 |
18 |
19 |
20 |
21 | com.abhioncbr.daflow
22 | daflow-hadoop-base-docker
23 | ${project.version}
24 | pom
25 | import
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 | com.spotify
34 | dockerfile-maven-plugin
35 | ${dockerfile.maven.version}
36 |
37 |
38 | tag-latest
39 | pre-integration-test
40 |
41 | build
42 | tag
43 | push
44 |
45 |
46 | latest
47 | ${docker.build.skip}
48 | false
49 | ${docker.reg.username}
50 | ${docker.reg.password}
51 | abhioncbr/daflow-hadoop-datanode
52 |
53 | ${docker.hadoop.version}
54 | ${docker.hadoop.dn.port}
55 |
56 |
57 |
58 |
59 | tag-version
60 | pre-integration-test
61 |
62 | build
63 | tag
64 | push
65 |
66 |
67 | ${docker.hadoop.version}-${project.version}
68 | ${docker.build.skip}
69 | false
70 | ${docker.reg.username}
71 | ${docker.reg.password}
72 | abhioncbr/daflow-hadoop-datanode
73 |
74 | ${docker.hadoop.version}
75 | ${docker.hadoop.dn.port}
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
--------------------------------------------------------------------------------
/docker/images/hadoop/datanode/run_dn.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | datadir=`echo $HDFS_CONF_dfs_datanode_data_dir | perl -pe 's#file://##'`
4 | if [ ! -d $datadir ]; then
5 | echo "Datanode data directory not found: $datadir"
6 | exit 2
7 | fi
8 |
9 | $HADOOP_PREFIX/bin/hdfs --config $HADOOP_CONF_DIR datanode
--------------------------------------------------------------------------------
/docker/images/hadoop/historyserver/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG HADOOP_VERSION=2.8.4
2 | ARG HADOOP_HISTORY_PORT=8188
3 | FROM abhioncbr/daflow-hadoop-base:${HADOOP_VERSION}
4 |
5 | ENV HADOOP_HISTORY_PORT ${HADOOP_HISTORY_PORT}
6 |
7 | ENV YARN_CONF_yarn_timeline___service_leveldb___timeline___store_path=/hadoop/yarn/timeline
8 | RUN mkdir -p /hadoop/yarn/timeline
9 | VOLUME /hadoop/yarn/timeline
10 |
11 | ADD run_history.sh /run_history.sh
12 | RUN chmod a+x /run_history.sh
13 |
14 | CMD ["/run_history.sh"]
--------------------------------------------------------------------------------
/docker/images/hadoop/historyserver/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | com.abhioncbr.daflow
4 | daflow-docker
5 | ${revision}
6 | ../../pom.xml
7 |
8 |
9 | pom
10 | ${daflow.docker.hadoop.historyserver.version}
11 | 4.0.0
12 | daflow-hadoop-historyserver-docker
13 | DaFlow's Docker Image of Hadoop History Server.
14 |
15 |
16 | UTF-8
17 |
18 |
19 |
20 |
21 | com.abhioncbr.daflow
22 | daflow-hadoop-base-docker
23 | ${project.version}
24 | pom
25 | import
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 | com.spotify
34 | dockerfile-maven-plugin
35 | ${dockerfile.maven.version}
36 |
37 |
38 | tag-latest
39 | pre-integration-test
40 |
41 | build
42 | tag
43 | push
44 |
45 |
46 | latest
47 | ${docker.build.skip}
48 | false
49 | ${docker.reg.username}
50 | ${docker.reg.password}
51 | abhioncbr/daflow-hadoop-historyserver
52 |
53 | ${docker.hadoop.version}
54 | ${docker.hadoop.hs.port}
55 |
56 |
57 |
58 |
59 | tag-version
60 | pre-integration-test
61 |
62 | build
63 | tag
64 | push
65 |
66 |
67 | ${docker.hadoop.version}-${project.version}
68 | ${docker.build.skip}
69 | false
70 | ${docker.reg.username}
71 | ${docker.reg.password}
72 | abhioncbr/daflow-hadoop-historyserver
73 |
74 | ${docker.hadoop.version}
75 | ${docker.hadoop.hs.port}
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
--------------------------------------------------------------------------------
/docker/images/hadoop/historyserver/run_history.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | $HADOOP_PREFIX/bin/yarn --config $HADOOP_CONF_DIR historyserver
--------------------------------------------------------------------------------
/docker/images/hadoop/namenode/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG HADOOP_VERSION=2.8.4
2 | ARG HADOOP_WEBHDFS_PORT=50070
3 | FROM abhioncbr/daflow-hadoop-base:${HADOOP_VERSION}
4 |
5 | ENV HADOOP_WEBHDFS_PORT ${HADOOP_WEBHDFS_PORT}
6 |
7 | ENV HDFS_CONF_dfs_namenode_name_dir=file:///hadoop/dfs/name
8 | RUN mkdir -p /hadoop/dfs/name
9 | VOLUME /hadoop/dfs/name
10 |
11 | ADD run_nn.sh /run_nn.sh
12 | RUN chmod a+x /run_nn.sh
13 |
14 | CMD ["/run_nn.sh"]
--------------------------------------------------------------------------------
/docker/images/hadoop/namenode/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | com.abhioncbr.daflow
4 | daflow-docker
5 | ${revision}
6 | ../../pom.xml
7 |
8 |
9 | pom
10 | ${daflow.docker.hadoop.namenode.version}
11 | 4.0.0
12 | daflow-hadoop-namenode-docker
13 | DaFlow's Docker Image of Hadoop Name Node.
14 |
15 |
16 | UTF-8
17 |
18 |
19 |
20 |
21 | com.abhioncbr.daflow
22 | daflow-hadoop-base-docker
23 | ${project.version}
24 | pom
25 | import
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 | com.spotify
34 | dockerfile-maven-plugin
35 | ${dockerfile.maven.version}
36 |
37 |
38 | tag-latest
39 | pre-integration-test
40 |
41 | build
42 | tag
43 | push
44 |
45 |
46 | latest
47 | ${docker.build.skip}
48 | false
49 | ${docker.reg.username}
50 | ${docker.reg.password}
51 | abhioncbr/daflow-hadoop-namenode
52 |
53 | ${docker.hadoop.version}
54 | ${docker.hadoop.webHdfs.port}
55 |
56 |
57 |
58 |
59 | tag-version
60 | pre-integration-test
61 |
62 | build
63 | tag
64 | push
65 |
66 |
67 | ${docker.hadoop.version}-${project.version}
68 | ${docker.build.skip}
69 | false
70 | ${docker.reg.username}
71 | ${docker.reg.password}
72 | abhioncbr/daflow-hadoop-namenode
73 |
74 | ${docker.hadoop.version}
75 | ${docker.hadoop.webHdfs.port}
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
--------------------------------------------------------------------------------
/docker/images/hadoop/namenode/run_nn.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | namedir=`echo $HDFS_CONF_dfs_namenode_name_dir | perl -pe 's#file://##'`
4 | if [ ! -d $namedir ]; then
5 | echo "Namenode name directory not found: $namedir"
6 | exit 2
7 | fi
8 |
9 | if [ -z "$CLUSTER_NAME" ]; then
10 | echo "Cluster name not specified"
11 | exit 2
12 | fi
13 |
14 | if [ "`ls -A $namedir`" == "" ]; then
15 | echo "Formatting namenode name directory: $namedir"
16 | $HADOOP_PREFIX/bin/hdfs --config $HADOOP_CONF_DIR namenode -format $CLUSTER_NAME
17 | fi
18 |
19 | $HADOOP_PREFIX/bin/hdfs --config $HADOOP_CONF_DIR namenode
--------------------------------------------------------------------------------
/docker/images/hive/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG HADOOP_VERSION=2.8.4
2 | FROM abhioncbr/daflow-hadoop-base:${HADOOP_VERSION}
3 |
4 | ENV HIVE_HOME /opt/hive
5 | ENV PATH $HIVE_HOME/bin:$PATH
6 | ENV HADOOP_HOME /opt/hadoop-$HADOOP_VERSION
7 |
8 | WORKDIR /opt
9 |
10 | ARG HIVE_VERSION=2.3.3
11 | ARG HIVE_URL=https://archive.apache.org/dist/hive/hive-$HIVE_VERSION/apache-hive-$HIVE_VERSION-bin.tar.gz
12 | ENV HIVE_VERSION ${HIVE_VERSION}
13 | ENV HIVE_URL ${HIVE_URL}
14 |
15 | #Install Hive MySQL, PostgreSQL JDBC
16 | RUN echo "Hive URL is :${HIVE_URL}" && wget ${HIVE_URL} -O hive.tar.gz && \
17 | tar -xzvf hive.tar.gz && mv *hive*-bin hive && \
18 | ln -s /usr/share/java/mysql-connector-java.jar $HIVE_HOME/lib/mysql-connector-java.jar && \
19 | wget https://jdbc.postgresql.org/download/postgresql-9.4.1212.jar -O $HIVE_HOME/lib/postgresql-jdbc.jar && \
20 | rm hive.tar.gz && mkdir -p /var/daflow/ws/docker/daflow/hadoop/hive_base/target/
21 |
22 | #Spark should be compiled with Hive to be able to use it
23 | #hive-site.xml should be copied to $SPARK_HOME/conf folder
24 |
25 | #Custom configuration goes here
26 | ADD conf/hive-site.xml $HADOOP_CONF_DIR
27 | ADD conf/beeline-log4j2.properties $HIVE_HOME/conf
28 | ADD conf/hive-env.sh $HIVE_HOME/conf
29 | ADD conf/hive-exec-log4j2.properties $HIVE_HOME/conf
30 | ADD conf/hive-log4j2.properties $HIVE_HOME/conf
31 | ADD conf/ivysettings.xml $HIVE_HOME/conf
32 | ADD conf/llap-daemon-log4j2.properties $HIVE_HOME/conf
33 |
34 | # Setup DaFlow Library jars
35 | ADD target/demo /var/daflow/ws/daflow-examples/demo
36 |
37 | ENV DAFLOW_BUNDLE=/var/daflow/demo/artifacts/daflow-*.jar
38 |
39 | COPY startup.sh /usr/local/bin/
40 | RUN chmod +x /usr/local/bin/startup.sh
41 |
42 | COPY entrypoint.sh /usr/local/bin/
43 | RUN chmod +x /usr/local/bin/entrypoint.sh
44 |
45 | ENV PATH $HIVE_HOME/bin/:$PATH
46 |
47 | ENTRYPOINT ["entrypoint.sh"]
48 | CMD startup.sh
49 |
--------------------------------------------------------------------------------
/docker/images/hive/conf/beeline-log4j2.properties:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | status = INFO
18 | name = BeelineLog4j2
19 | packages = org.apache.hadoop.hive.ql.log
20 |
21 | # list of properties
22 | property.hive.log.level = WARN
23 | property.hive.root.logger = console
24 |
25 | # list of all appenders
26 | appenders = console
27 |
28 | # console appender
29 | appender.console.type = Console
30 | appender.console.name = console
31 | appender.console.target = SYSTEM_ERR
32 | appender.console.layout.type = PatternLayout
33 | appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} [%t]: %p %c{2}: %m%n
34 |
35 | # list of all loggers
36 | loggers = HiveConnection
37 |
38 | # HiveConnection logs useful info for dynamic service discovery
39 | logger.HiveConnection.name = org.apache.hive.jdbc.HiveConnection
40 | logger.HiveConnection.level = INFO
41 |
42 | # root logger
43 | rootLogger.level = ${sys:hive.log.level}
44 | rootLogger.appenderRefs = root
45 | rootLogger.appenderRef.root.ref = ${sys:hive.root.logger}
46 |
--------------------------------------------------------------------------------
/docker/images/hive/conf/hive-env.sh:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # Set Hive and Hadoop environment variables here. These variables can be used
18 | # to control the execution of Hive. It should be used by admins to configure
19 | # the Hive installation (so that users do not have to set environment variables
20 | # or set command line parameters to get correct behavior).
21 | #
22 | # The hive service being invoked (CLI/HWI etc.) is available via the environment
23 | # variable SERVICE
24 |
25 |
26 | # Hive Client memory usage can be an issue if a large number of clients
27 | # are running at the same time. The flags below have been useful in
28 | # reducing memory usage:
29 | #
30 | # if [ "$SERVICE" = "cli" ]; then
31 | # if [ -z "$DEBUG" ]; then
32 | # export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:+UseParNewGC -XX:-UseGCOverheadLimit"
33 | # else
34 | # export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:-UseGCOverheadLimit"
35 | # fi
36 | # fi
37 |
38 | # The heap size of the jvm stared by hive shell script can be controlled via:
39 | #
40 | # export HADOOP_HEAPSIZE=1024
41 | #
42 | # Larger heap size may be required when running queries over large number of files or partitions.
43 | # By default hive shell scripts use a heap size of 256 (MB). Larger heap size would also be
44 | # appropriate for hive server (hwi etc).
45 |
46 |
47 | # Set HADOOP_HOME to point to a specific hadoop install directory
48 | # HADOOP_HOME=${bin}/../../hadoop
49 |
50 | # Hive Configuration Directory can be controlled by:
51 | # export HIVE_CONF_DIR=
52 |
53 | # Folder containing extra ibraries required for hive compilation/execution can be controlled by:
54 | # export HIVE_AUX_JARS_PATH=
55 |
--------------------------------------------------------------------------------
/docker/images/hive/conf/hive-exec-log4j2.properties:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | status = INFO
18 | name = HiveExecLog4j2
19 | packages = org.apache.hadoop.hive.ql.log
20 |
21 | # list of properties
22 | property.hive.log.level = INFO
23 | property.hive.root.logger = FA
24 | property.hive.query.id = hadoop
25 | property.hive.log.dir = ${sys:java.io.tmpdir}/${sys:user.name}
26 | property.hive.log.file = ${sys:hive.query.id}.log
27 |
28 | # list of all appenders
29 | appenders = console, FA
30 |
31 | # console appender
32 | appender.console.type = Console
33 | appender.console.name = console
34 | appender.console.target = SYSTEM_ERR
35 | appender.console.layout.type = PatternLayout
36 | appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} [%t]: %p %c{2}: %m%n
37 |
38 | # simple file appender
39 | appender.FA.type = File
40 | appender.FA.name = FA
41 | appender.FA.fileName = ${sys:hive.log.dir}/${sys:hive.log.file}
42 | appender.FA.layout.type = PatternLayout
43 | appender.FA.layout.pattern = %d{ISO8601} %-5p [%t]: %c{2} (%F:%M(%L)) - %m%n
44 |
45 | # list of all loggers
46 | loggers = NIOServerCnxn, ClientCnxnSocketNIO, DataNucleus, Datastore, JPOX
47 |
48 | logger.NIOServerCnxn.name = org.apache.zookeeper.server.NIOServerCnxn
49 | logger.NIOServerCnxn.level = WARN
50 |
51 | logger.ClientCnxnSocketNIO.name = org.apache.zookeeper.ClientCnxnSocketNIO
52 | logger.ClientCnxnSocketNIO.level = WARN
53 |
54 | logger.DataNucleus.name = DataNucleus
55 | logger.DataNucleus.level = ERROR
56 |
57 | logger.Datastore.name = Datastore
58 | logger.Datastore.level = ERROR
59 |
60 | logger.JPOX.name = JPOX
61 | logger.JPOX.level = ERROR
62 |
63 | # root logger
64 | rootLogger.level = ${sys:hive.log.level}
65 | rootLogger.appenderRefs = root
66 | rootLogger.appenderRef.root.ref = ${sys:hive.root.logger}
67 |
--------------------------------------------------------------------------------
/docker/images/hive/conf/hive-log4j2.properties:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | status = INFO
18 | name = HiveLog4j2
19 | packages = org.apache.hadoop.hive.ql.log
20 |
21 | # list of properties
22 | property.hive.log.level = INFO
23 | property.hive.root.logger = DRFA
24 | property.hive.log.dir = ${sys:java.io.tmpdir}/${sys:user.name}
25 | property.hive.log.file = hive.log
26 |
27 | # list of all appenders
28 | appenders = console, DRFA
29 |
30 | # console appender
31 | appender.console.type = Console
32 | appender.console.name = console
33 | appender.console.target = SYSTEM_ERR
34 | appender.console.layout.type = PatternLayout
35 | appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} [%t]: %p %c{2}: %m%n
36 |
37 | # daily rolling file appender
38 | appender.DRFA.type = RollingFile
39 | appender.DRFA.name = DRFA
40 | appender.DRFA.fileName = ${sys:hive.log.dir}/${sys:hive.log.file}
41 | # Use %pid in the filePattern to append @ to the filename if you want separate log files for different CLI session
42 | appender.DRFA.filePattern = ${sys:hive.log.dir}/${sys:hive.log.file}.%d{yyyy-MM-dd}
43 | appender.DRFA.layout.type = PatternLayout
44 | appender.DRFA.layout.pattern = %d{ISO8601} %-5p [%t]: %c{2} (%F:%M(%L)) - %m%n
45 | appender.DRFA.policies.type = Policies
46 | appender.DRFA.policies.time.type = TimeBasedTriggeringPolicy
47 | appender.DRFA.policies.time.interval = 1
48 | appender.DRFA.policies.time.modulate = true
49 | appender.DRFA.strategy.type = DefaultRolloverStrategy
50 | appender.DRFA.strategy.max = 30
51 |
52 | # list of all loggers
53 | loggers = NIOServerCnxn, ClientCnxnSocketNIO, DataNucleus, Datastore, JPOX
54 |
55 | logger.NIOServerCnxn.name = org.apache.zookeeper.server.NIOServerCnxn
56 | logger.NIOServerCnxn.level = WARN
57 |
58 | logger.ClientCnxnSocketNIO.name = org.apache.zookeeper.ClientCnxnSocketNIO
59 | logger.ClientCnxnSocketNIO.level = WARN
60 |
61 | logger.DataNucleus.name = DataNucleus
62 | logger.DataNucleus.level = ERROR
63 |
64 | logger.Datastore.name = Datastore
65 | logger.Datastore.level = ERROR
66 |
67 | logger.JPOX.name = JPOX
68 | logger.JPOX.level = ERROR
69 |
70 | # root logger
71 | rootLogger.level = ${sys:hive.log.level}
72 | rootLogger.appenderRefs = root
73 | rootLogger.appenderRef.root.ref = ${sys:hive.root.logger}
74 |
--------------------------------------------------------------------------------
/docker/images/hive/conf/hive-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
18 |
19 |
--------------------------------------------------------------------------------
/docker/images/hive/conf/ivysettings.xml:
--------------------------------------------------------------------------------
1 |
17 |
18 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
--------------------------------------------------------------------------------
/docker/images/hive/startup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | hadoop fs -mkdir /tmp
4 | hadoop fs -mkdir -p /user/hive/warehouse
5 | hadoop fs -chmod g+w /tmp
6 | hadoop fs -chmod g+w /user/hive/warehouse
7 |
8 | cd ${HIVE_HOME}/bin
9 | export AUX_CLASSPATH=file://${DAFLOW_BUNDLE}
10 | ./hiveserver2 --hiveconf hive.server2.enable.doAs=false --hiveconf hive.aux.jars.path=file://${DAFLOW_BUNDLE}
11 |
--------------------------------------------------------------------------------
/docker/images/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | com.abhioncbr.daflow
5 | daflow
6 | ${revision}
7 | ../../pom.xml
8 |
9 |
10 | pom
11 | ${revision}
12 | 4.0.0
13 | daflow-docker
14 |
15 |
16 | hive
17 | spark/base
18 | hadoop/base
19 | spark/adhoc
20 | spark/worker
21 | spark/master
22 | hadoop/datanode
23 | hadoop/namenode
24 | hadoop/historyserver
25 |
26 |
27 |
28 | daflow
29 | daflow
30 | abc@daflow.com
31 |
32 | true
33 |
34 | 2.3.3
35 | 2.4.3
36 | 2.8.4
37 |
38 | 8188
39 | 50075
40 | 50070
41 |
42 | 2.7
43 |
44 | 1.4.10
45 |
46 |
47 |
48 |
49 |
50 | com.spotify
51 | dockerfile-maven-extension
52 | ${dockerfile.maven.version}
53 |
54 |
55 |
56 |
57 | com.spotify
58 | dockerfile-maven-plugin
59 | ${dockerfile.maven.version}
60 |
61 | true
62 | false
63 | false
64 |
65 |
66 |
67 |
68 |
69 |
--------------------------------------------------------------------------------
/docker/images/spark/adhoc/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG HADOOP_VERSION=2.8.4
2 | ARG HIVE_VERSION=2.3.3
3 | ARG SPARK_VERSION=2.3.1
4 | FROM abhioncbr/daflow-spark-base:${SPARK_VERSION}
5 |
6 | COPY adhoc.sh /opt/spark
7 |
8 | ENV SPARK_WORKER_WEBUI_PORT 8081
9 | ENV SPARK_WORKER_LOG /spark/logs
10 | ENV SPARK_MASTER "spark://spark-master:7077"
11 |
12 | CMD ["/bin/bash", "/opt/spark/adhoc.sh"]
--------------------------------------------------------------------------------
/docker/images/spark/adhoc/adhoc.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | . "/spark/sbin/spark-config.sh"
4 |
5 | . "/spark/bin/load-spark-env.sh"
6 |
7 |
8 | export SPARK_HOME=/opt/spark
9 |
10 | date
11 | echo "SPARK HOME is : $SPARK_HOME"
12 |
13 | tail -f /dev/null
14 |
--------------------------------------------------------------------------------
/docker/images/spark/adhoc/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | com.abhioncbr.daflow
4 | daflow-docker
5 | ${revision}
6 | ../../pom.xml
7 |
8 |
9 | pom
10 | ${daflow.docker.spark.adhoc.version}
11 | 4.0.0
12 | daflow-adhoc1
13 | DaFlow's Docker Image of Spark adhoc node.
14 |
15 |
16 | UTF-8
17 |
18 |
19 |
20 |
21 | com.abhioncbr.daflow
22 | daflow-spark-base-docker
23 | ${project.version}
24 | pom
25 | import
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 | com.spotify
34 | dockerfile-maven-plugin
35 | ${dockerfile.maven.version}
36 |
37 |
38 | tag-latest
39 | pre-integration-test
40 |
41 | build
42 | tag
43 | push
44 |
45 |
46 | latest
47 | ${docker.build.skip}
48 | false
49 | ${docker.reg.username}
50 | ${docker.reg.password}
51 | abhioncbr/daflow-adhoc1
52 |
53 | ${docker.hive.version}
54 | ${docker.spark.version}
55 | ${docker.hadoop.version}
56 |
57 |
58 |
59 |
60 | tag-version
61 | pre-integration-test
62 |
63 | build
64 | tag
65 | push
66 |
67 |
68 | ${docker.spark.version}-${project.version}
69 | ${docker.build.skip}
70 | false
71 | ${docker.reg.username}
72 | ${docker.reg.password}
73 | abhioncbr/daflow-adhoc1
74 |
75 | ${docker.hive.version}
76 | ${docker.spark.version}
77 | ${docker.hadoop.version}
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
--------------------------------------------------------------------------------
/docker/images/spark/base/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG HADOOP_VERSION=2.8.4
2 | ARG HIVE_VERSION=2.3.3
3 |
4 | FROM abhioncbr/daflow-hive:${HADOOP_VERSION}-${HIVE_VERSION}
5 |
6 | ENV ENABLE_INIT_DAEMON true
7 | ENV INIT_DAEMON_BASE_URI http://identifier/init-daemon
8 | ENV INIT_DAEMON_STEP spark_master_init
9 |
10 | ARG SPARK_VERSION=2.4.3
11 | ARG SPARK_HADOOP_VERSION=2.7
12 |
13 | ENV SPARK_VERSION ${SPARK_VERSION}
14 | ENV HADOOP_VERSION ${SPARK_HADOOP_VERSION}
15 |
16 | COPY wait-for-step.sh /
17 | COPY execute-step.sh /
18 | COPY finish-step.sh /
19 |
20 | RUN echo "Installing Spark-version (${SPARK_VERSION})" \
21 | && wget http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
22 | && tar -xvzf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
23 | && mv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} /opt/spark \
24 | && rm spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
25 | && cd /
26 |
27 | #Give permission to execute scripts
28 | RUN chmod +x /wait-for-step.sh && chmod +x /execute-step.sh && chmod +x /finish-step.sh
29 |
30 | # Fix the value of PYTHONHASHSEED
31 | # Note: this is needed when you use Python 3.3 or greater
32 | ENV PYTHONHASHSEED 1
33 |
34 | ENV SPARK_HOME /opt/spark
35 | ENV SPARK_INSTALL ${SPARK_HOME}
36 | ENV SPARK_CONF_DIR ${SPARK_HOME}/conf
37 | ENV PATH $SPARK_INSTALL/bin:$PATH
38 |
39 | ENV SPARK_DRIVER_PORT 5001
40 | ENV SPARK_UI_PORT 5002
41 | ENV SPARK_BLOCKMGR_PORT 5003
42 |
43 | EXPOSE $SPARK_DRIVER_PORT $SPARK_UI_PORT $SPARK_BLOCKMGR_PORT
44 |
45 | # Without this spark-shell fails - Download if it is not already there in $SPARK_INSTALL
46 | RUN wget -nc -q -O "${SPARK_INSTALL}/jars/jersey-bundle-1.19.4.jar" "http://repo1.maven.org/maven2/com/sun/jersey/jersey-bundle/1.19.4/jersey-bundle-1.19.4.jar"
47 |
48 |
--------------------------------------------------------------------------------
/docker/images/spark/base/execute-step.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ $ENABLE_INIT_DAEMON = "true" ]
4 | then
5 | echo "Execute step ${INIT_DAEMON_STEP} in pipeline"
6 | while true; do
7 | sleep 5
8 | echo -n '.'
9 | string=$(curl -sL -w "%{http_code}" -X PUT $INIT_DAEMON_BASE_URI/execute?step=$INIT_DAEMON_STEP -o /dev/null)
10 | [ "$string" = "204" ] && break
11 | done
12 | echo "Notified execution of step ${INIT_DAEMON_STEP}"
13 | fi
14 |
15 |
--------------------------------------------------------------------------------
/docker/images/spark/base/finish-step.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ $ENABLE_INIT_DAEMON = "true" ]
4 | then
5 | echo "Finish step ${INIT_DAEMON_STEP} in pipeline"
6 | while true; do
7 | sleep 5
8 | echo -n '.'
9 | string=$(curl -sL -w "%{http_code}" -X PUT $INIT_DAEMON_BASE_URI/finish?step=$INIT_DAEMON_STEP -o /dev/null)
10 | [ "$string" = "204" ] && break
11 | done
12 | echo "Notified finish of step ${INIT_DAEMON_STEP}"
13 | fi
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/docker/images/spark/base/wait-for-step.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ $ENABLE_INIT_DAEMON = "true" ]
4 | then
5 | echo "Validating if step ${INIT_DAEMON_STEP} can start in pipeline"
6 | while true; do
7 | sleep 5
8 | echo -n '.'
9 | string=$(curl -s $INIT_DAEMON_BASE_URI/canStart?step=$INIT_DAEMON_STEP)
10 | [ "$string" = "true" ] && break
11 | done
12 | echo "Can start step ${INIT_DAEMON_STEP}"
13 | fi
14 |
--------------------------------------------------------------------------------
/docker/images/spark/master/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG HADOOP_VERSION=2.8.4
2 | ARG HIVE_VERSION=2.3.3
3 | ARG SPARK_VERSION=2.4.3
4 | FROM abhioncbr/daflow-spark-base:${SPARK_VERSION}
5 |
6 | COPY master.sh /opt/spark
7 |
8 | ENV SPARK_MASTER_PORT 7077
9 | ENV SPARK_MASTER_WEBUI_PORT 8080
10 | ENV SPARK_MASTER_LOG /opt/spark/logs
11 |
12 | EXPOSE 8080 7077 6066
13 |
14 | CMD ["/bin/bash", "/opt/spark/master.sh"]
15 |
--------------------------------------------------------------------------------
/docker/images/spark/master/master.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export SPARK_MASTER_HOST=`hostname`
4 |
5 | . "/opt/spark/sbin/spark-config.sh"
6 |
7 | . "/opt/spark/bin/load-spark-env.sh"
8 |
9 | mkdir -p $SPARK_MASTER_LOG
10 |
11 | export SPARK_HOME=/opt/spark
12 |
13 | ln -sf /dev/stdout $SPARK_MASTER_LOG/spark-master.out
14 |
15 | cd /opt/spark/bin && /opt/spark/sbin/../bin/spark-class org.apache.spark.deploy.master.Master \
16 | --ip $SPARK_MASTER_HOST --port $SPARK_MASTER_PORT --webui-port $SPARK_MASTER_WEBUI_PORT >> $SPARK_MASTER_LOG/spark-master.out
17 |
--------------------------------------------------------------------------------
/docker/images/spark/worker/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG HADOOP_VERSION=2.8.4
2 | ARG HIVE_VERSION=2.3.3
3 | ARG SPARK_VERSION=2.4.3
4 | FROM abhioncbr/daflow-spark-base:${SPARK_VERSION}
5 |
6 | COPY worker.sh /opt/spark
7 |
8 | ENV SPARK_WORKER_WEBUI_PORT 8081
9 | ENV SPARK_WORKER_LOG /spark/logs
10 | ENV SPARK_MASTER "spark://spark-master:7077"
11 |
12 | EXPOSE 8081
13 |
14 | CMD ["/bin/bash", "/opt/spark/worker.sh"]
15 |
--------------------------------------------------------------------------------
/docker/images/spark/worker/worker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | . "/spark/sbin/spark-config.sh"
4 |
5 | . "/spark/bin/load-spark-env.sh"
6 |
7 | mkdir -p $SPARK_WORKER_LOG
8 |
9 | export SPARK_HOME=/opt/spark
10 |
11 | ln -sf /dev/stdout ${SPARK_WORKER_LOG}/spark-worker.out
12 |
13 | date
14 | echo "SPARK HOME is : $SPARK_HOME"
15 | /opt/spark/sbin/../bin/spark-class org.apache.spark.deploy.worker.Worker \
16 | --webui-port ${SPARK_WORKER_WEBUI_PORT} ${SPARK_MASTER} >> ${SPARK_WORKER_LOG}/spark-worker.out
17 |
--------------------------------------------------------------------------------
/docker/scripts/setup_demo_container.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | hadoop fs -mkdir -p /user/root/daflow-examples/demo/sample-data
3 | hadoop fs -copyFromLocal -f /var/daflow/ws/daflow-examples/demo/sample-data/json_data.json /user/root/daflow-examples/demo/sample-data
4 | hadoop fs -mkdir -p /user/root/daflow-examples/demo/sample-data/daflow-result
5 |
--------------------------------------------------------------------------------
/docker/setup_demo.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # Create host mount directory and copy
4 | mkdir -p /tmp/daflow_hadoop_namenode
5 | mkdir -p /tmp/daflow_hadoop_datanode
6 |
7 | DAFLOW_ROOT=`dirname $PWD`
8 |
9 | # restart cluster
10 | DAFLOW_WS=${DAFLOW_ROOT} docker-compose -f compose/docker-compose-daflow.yml down
11 | DAFLOW_WS=${DAFLOW_ROOT} docker-compose -f compose/docker-compose-daflow.yml pull
12 | rm -rf /tmp/daflow_hadoop_datanode/*
13 | rm -rf /tmp/daflow_hadoop_namenode/*
14 | sleep 5
15 |
16 | DAFLOW_WS=${DAFLOW_ROOT} docker-compose -f compose/docker-compose-daflow.yml up -d
17 | sleep 15
18 |
19 | docker exec -it daflow-adhoc-1 /bin/bash /var/daflow/ws/docker/scripts/setup_demo_container.sh
--------------------------------------------------------------------------------
/docker/stop_demo.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # set up root directory
3 | DAFLOW_ROOT=`dirname $PWD`
4 |
5 | # shut down cluster
6 | DAFLOW_WS=${DAFLOW_ROOT} docker-compose -f compose/docker-compose-daflow.yml down
7 |
8 | # remove houst mount directory
9 | rm -rf /tmp/daflow_hadoop_datanode
10 | rm -rf /tmp/daflow_hadoop_namenode
11 |
--------------------------------------------------------------------------------
/style/checkstyle-suppressions.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------