├── ansible
├── data
│ ├── group_vars
│ │ └── .gitkeep
│ ├── host_vars
│ │ └── .gitkeep
│ ├── roles
│ │ ├── common
│ │ │ ├── defaults
│ │ │ │ └── .gitkeep
│ │ │ ├── files
│ │ │ │ └── .gitkeep
│ │ │ ├── handlers
│ │ │ │ └── .gitkeep
│ │ │ ├── meta
│ │ │ │ └── .gitkeep
│ │ │ ├── vars
│ │ │ │ └── main.yml
│ │ │ ├── templates
│ │ │ │ └── motd
│ │ │ └── tasks
│ │ │ │ ├── main.yml
│ │ │ │ ├── motd.yml
│ │ │ │ ├── package.yml
│ │ │ │ └── oracle-jdk.yml
│ │ ├── docker
│ │ │ ├── meta
│ │ │ │ └── main.yml
│ │ │ └── tasks
│ │ │ │ └── main.yml
│ │ └── schema-registry
│ │ │ ├── meta
│ │ │ └── main.yml
│ │ │ ├── defaults
│ │ │ └── main.yml
│ │ │ ├── img
│ │ │ ├── ansible.png
│ │ │ └── draw-io-ansible.xml
│ │ │ ├── handlers
│ │ │ └── main.yml
│ │ │ ├── docker-compose-local.yml
│ │ │ ├── files
│ │ │ ├── log4j.properties
│ │ │ └── schema-registry.properties
│ │ │ ├── README.md
│ │ │ └── tasks
│ │ │ └── main.yml
│ ├── hosts
│ └── site.yml
├── destroy_ansible.sh
├── setup_share.sh
├── setup_ansible.sh
└── Vagrantfile
├── aws
└── emr
│ ├── application
│ ├── api
│ │ ├── __init__.py
│ │ ├── status_api.py
│ │ ├── emr_api.py
│ │ └── example_api.py
│ ├── service
│ │ ├── __init__.py
│ │ ├── emr_service.py
│ │ └── example_service.py
│ ├── templates
│ │ ├── page_not_found.html
│ │ └── hello.html
│ ├── static
│ │ └── example.txt
│ ├── __init__.py
│ ├── configuration.py
│ ├── main.py
│ └── logger.py
│ ├── setup.cfg
│ ├── .dockerignore
│ ├── MANIFEST.in
│ ├── dev.sh
│ ├── requirements.txt
│ ├── README.md
│ ├── Dockerfile
│ ├── setup.py
│ └── tests
│ └── application_test.py
├── hadoop
├── example
│ ├── map-reduce
│ │ ├── src
│ │ │ ├── test
│ │ │ │ └── java
│ │ │ │ │ └── .gitkeep
│ │ │ └── main
│ │ │ │ └── java
│ │ │ │ └── com
│ │ │ │ └── github
│ │ │ │ └── niqdev
│ │ │ │ ├── IntSumReducer.java
│ │ │ │ ├── TokenizerMapper.java
│ │ │ │ └── WordCount.java
│ │ ├── settings.gradle
│ │ ├── README.md
│ │ ├── gradle
│ │ │ └── wrapper
│ │ │ │ ├── gradle-wrapper.jar
│ │ │ │ └── gradle-wrapper.properties
│ │ ├── build.gradle
│ │ ├── gradlew.bat
│ │ └── gradlew
│ └── spark
│ │ ├── src
│ │ ├── test
│ │ │ └── scala
│ │ │ │ └── .gitkeep
│ │ └── main
│ │ │ └── scala
│ │ │ └── com
│ │ │ └── github
│ │ │ └── niqdev
│ │ │ └── App.scala
│ │ ├── project
│ │ ├── build.properties
│ │ └── Dependencies.scala
│ │ └── build.sbt
├── file
│ ├── hadoop
│ │ ├── config
│ │ │ ├── masters
│ │ │ ├── slaves
│ │ │ ├── core-site.xml
│ │ │ ├── hdfs-site.xml
│ │ │ ├── mapred-site.xml
│ │ │ ├── fair-scheduler.xml
│ │ │ └── yarn-site.xml
│ │ └── profile-hadoop.sh
│ ├── oozie
│ │ ├── profile-oozie.sh
│ │ └── config
│ │ │ ├── oozie-env.sh
│ │ │ └── oozie-site.xml
│ ├── spark
│ │ ├── profile-spark.sh
│ │ └── config
│ │ │ ├── spark-env.sh
│ │ │ ├── spark-defaults.conf
│ │ │ └── log4j.properties
│ ├── ssh
│ │ └── config
│ ├── zeppelin
│ │ ├── profile-zeppelin.sh
│ │ └── config
│ │ │ └── zeppelin-env.sh
│ ├── hosts
│ └── motd
├── script
│ ├── setup_zeppelin.sh
│ ├── bootstrap.sh
│ ├── setup_hadoop.sh
│ ├── setup_spark.sh
│ ├── setup_ubuntu.sh
│ └── setup_oozie.sh
├── Vagrantfile
└── vagrant_hadoop.sh
├── docs
├── img
│ ├── hdfs-read.png
│ ├── spark-job.png
│ ├── hdfs-write.png
│ ├── kafka-topic.png
│ ├── kafka-cluster.png
│ ├── kafka-consumer.png
│ ├── kafka-producer.png
│ ├── kubernetes-run.png
│ ├── map-reduce-job.png
│ ├── yarn-scheduler.png
│ ├── cassandra-memory.png
│ ├── cassandra-query.png
│ ├── kubernetes-rbac.png
│ ├── yarn-application.png
│ ├── cassandra-read-path.png
│ ├── kubernetes-client.png
│ ├── kubernetes-cluster.png
│ ├── kubernetes-volume.png
│ ├── spark-architecture.png
│ ├── cassandra-token-ring.png
│ ├── cassandra-write-path.png
│ ├── kafka-consumer-group.png
│ ├── kafka-rebalance-lost.png
│ ├── kubernetes-deployment.png
│ ├── map-reduce-data-flow.png
│ ├── kubernetes-architecture.png
│ ├── kubernetes-container-api.png
│ └── kafka-rebalance-duplicate.png
├── jvm.md
├── scala.md
├── index.md
├── azure.md
├── other-resources.md
├── zookeeper.md
├── programming.md
├── cloud.md
├── operating-system.md
├── docker.md
├── ansible.md
├── toolbox.md
├── cassandra.md
├── system-design.md
├── kafka.md
└── hadoop.md
├── .github
├── dependabot.yml
└── workflows
│ └── gh-pages.yml
├── miscellaneous
├── hello.c
└── setup_k8s.sh
├── cassandra
├── docker-compose.yml
├── cql
│ ├── all_users.csv
│ ├── column_users.csv
│ ├── example_create.cql
│ └── example_query.cql
└── docker-compose-cluster.yml
├── requirements.txt
├── base
├── supervisor.sed
└── Dockerfile
├── .gitignore
├── zookeeper
├── supervisor.ini
├── zoo.cfg
└── Dockerfile
├── kafka
├── supervisor-connect.ini
├── supervisor-kafka.ini
├── docker-compose.yml
├── Dockerfile
└── docker-compose-hub.yml
├── docs-todo
├── _aws.md
├── _neo4j.md
└── _spark.md
├── mkdocs.yml
├── README.md
└── dev.txt
/ansible/data/group_vars/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ansible/data/host_vars/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/aws/emr/application/api/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ansible/data/roles/common/defaults/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ansible/data/roles/common/files/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ansible/data/roles/common/handlers/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ansible/data/roles/common/meta/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/aws/emr/application/service/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/aws/emr/setup.cfg:
--------------------------------------------------------------------------------
1 | [aliases]
2 | test=pytest
--------------------------------------------------------------------------------
/aws/emr/.dockerignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *.pyc
3 |
--------------------------------------------------------------------------------
/hadoop/example/map-reduce/src/test/java/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/hadoop/example/spark/src/test/scala/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/aws/emr/application/templates/page_not_found.html:
--------------------------------------------------------------------------------
1 | D'oh!
--------------------------------------------------------------------------------
/ansible/data/roles/common/vars/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | apt_cache: 3600
--------------------------------------------------------------------------------
/aws/emr/application/static/example.txt:
--------------------------------------------------------------------------------
1 | example-static-file
2 |
--------------------------------------------------------------------------------
/hadoop/file/hadoop/config/masters:
--------------------------------------------------------------------------------
1 | secondary-namenode.local
2 |
--------------------------------------------------------------------------------
/aws/emr/MANIFEST.in:
--------------------------------------------------------------------------------
1 | graft app/templates
2 | graft app/static
3 |
--------------------------------------------------------------------------------
/hadoop/example/spark/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.1.1
2 |
--------------------------------------------------------------------------------
/ansible/data/roles/docker/meta/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | dependencies:
3 | - common
--------------------------------------------------------------------------------
/hadoop/example/map-reduce/settings.gradle:
--------------------------------------------------------------------------------
1 | rootProject.name = 'map-reduce'
2 |
--------------------------------------------------------------------------------
/hadoop/file/hadoop/config/slaves:
--------------------------------------------------------------------------------
1 | node-1.local
2 | node-2.local
3 | node-3.local
4 |
--------------------------------------------------------------------------------
/ansible/data/roles/schema-registry/meta/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | dependencies:
3 | - common
--------------------------------------------------------------------------------
/docs/img/hdfs-read.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/hdfs-read.png
--------------------------------------------------------------------------------
/docs/img/spark-job.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/spark-job.png
--------------------------------------------------------------------------------
/docs/img/hdfs-write.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/hdfs-write.png
--------------------------------------------------------------------------------
/docs/img/kafka-topic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/kafka-topic.png
--------------------------------------------------------------------------------
/docs/img/kafka-cluster.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/kafka-cluster.png
--------------------------------------------------------------------------------
/docs/img/kafka-consumer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/kafka-consumer.png
--------------------------------------------------------------------------------
/docs/img/kafka-producer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/kafka-producer.png
--------------------------------------------------------------------------------
/docs/img/kubernetes-run.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/kubernetes-run.png
--------------------------------------------------------------------------------
/docs/img/map-reduce-job.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/map-reduce-job.png
--------------------------------------------------------------------------------
/docs/img/yarn-scheduler.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/yarn-scheduler.png
--------------------------------------------------------------------------------
/docs/img/cassandra-memory.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/cassandra-memory.png
--------------------------------------------------------------------------------
/docs/img/cassandra-query.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/cassandra-query.png
--------------------------------------------------------------------------------
/docs/img/kubernetes-rbac.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/kubernetes-rbac.png
--------------------------------------------------------------------------------
/docs/img/yarn-application.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/yarn-application.png
--------------------------------------------------------------------------------
/docs/img/cassandra-read-path.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/cassandra-read-path.png
--------------------------------------------------------------------------------
/docs/img/kubernetes-client.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/kubernetes-client.png
--------------------------------------------------------------------------------
/docs/img/kubernetes-cluster.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/kubernetes-cluster.png
--------------------------------------------------------------------------------
/docs/img/kubernetes-volume.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/kubernetes-volume.png
--------------------------------------------------------------------------------
/docs/img/spark-architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/spark-architecture.png
--------------------------------------------------------------------------------
/docs/jvm.md:
--------------------------------------------------------------------------------
1 | # JVM
2 |
3 | Moved to scala-fp
4 |
--------------------------------------------------------------------------------
/docs/img/cassandra-token-ring.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/cassandra-token-ring.png
--------------------------------------------------------------------------------
/docs/img/cassandra-write-path.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/cassandra-write-path.png
--------------------------------------------------------------------------------
/docs/img/kafka-consumer-group.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/kafka-consumer-group.png
--------------------------------------------------------------------------------
/docs/img/kafka-rebalance-lost.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/kafka-rebalance-lost.png
--------------------------------------------------------------------------------
/docs/img/kubernetes-deployment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/kubernetes-deployment.png
--------------------------------------------------------------------------------
/docs/img/map-reduce-data-flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/map-reduce-data-flow.png
--------------------------------------------------------------------------------
/docs/scala.md:
--------------------------------------------------------------------------------
1 | # Scala
2 |
3 | Moved to scala-fp
4 |
--------------------------------------------------------------------------------
/docs/img/kubernetes-architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/kubernetes-architecture.png
--------------------------------------------------------------------------------
/docs/img/kubernetes-container-api.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/kubernetes-container-api.png
--------------------------------------------------------------------------------
/docs/img/kafka-rebalance-duplicate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/docs/img/kafka-rebalance-duplicate.png
--------------------------------------------------------------------------------
/hadoop/example/map-reduce/README.md:
--------------------------------------------------------------------------------
1 | # map-reduce-example
2 |
3 | ```
4 | ./gradlew clean build
5 | ./gradlew jar
6 | ```
7 |
--------------------------------------------------------------------------------
/hadoop/file/oozie/profile-oozie.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | export OOZIE_HOME=/usr/local/oozie
4 | export PATH=${OOZIE_HOME}/bin:${PATH}
5 |
--------------------------------------------------------------------------------
/hadoop/file/spark/profile-spark.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | export SPARK_HOME=/usr/local/spark
4 | export PATH=${SPARK_HOME}/bin:${PATH}
5 |
--------------------------------------------------------------------------------
/ansible/data/roles/schema-registry/defaults/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | schema:
3 | registry:
4 | user: cp-schema-registry
5 | group: confluent
6 |
--------------------------------------------------------------------------------
/hadoop/file/ssh/config:
--------------------------------------------------------------------------------
1 | Host *
2 | StrictHostKeyChecking no
3 | UserKnownHostsFile=/dev/null
4 | NoHostAuthenticationForLocalhost yes
5 |
--------------------------------------------------------------------------------
/ansible/data/roles/common/templates/motd:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | echo '\nHost: {{ ansible_nodename }}'
4 | echo 'Groups: {{ group_names | join(', ') }}'
5 |
--------------------------------------------------------------------------------
/ansible/data/roles/schema-registry/img/ansible.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/ansible/data/roles/schema-registry/img/ansible.png
--------------------------------------------------------------------------------
/hadoop/file/zeppelin/profile-zeppelin.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | export ZEPPELIN_HOME=/usr/local/zeppelin
4 | export PATH=${ZEPPELIN_HOME}/bin:${PATH}
5 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: "github-actions"
4 | directory: "/"
5 | schedule:
6 | interval: "daily"
7 |
--------------------------------------------------------------------------------
/hadoop/example/map-reduce/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/niqdev/devops/HEAD/hadoop/example/map-reduce/gradle/wrapper/gradle-wrapper.jar
--------------------------------------------------------------------------------
/miscellaneous/hello.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | main() {
4 | printf("Hello, World.\n");
5 | }
6 |
7 | // compile: cc -o hello hello.c
8 | // run: ./hello
9 |
--------------------------------------------------------------------------------
/ansible/data/roles/common/tasks/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 |
3 | - import_tasks: package.yml
4 | tags:
5 | - package
6 |
7 | - import_tasks: motd.yml
8 | tags:
9 | - motd
10 |
--------------------------------------------------------------------------------
/hadoop/file/oozie/config/oozie-env.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export OOZIE_BASE_PATH=/vol/oozie
4 | export OOZIE_DATA=${OOZIE_BASE_PATH}/data
5 | export OOZIE_LOG=${OOZIE_BASE_PATH}/log
6 |
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | # DevOps
2 |
3 | A collection of notes, resources, documentation and POCs mainly related to distributed systems for local development, learning purposes and quick prototyping.
4 |
--------------------------------------------------------------------------------
/aws/emr/application/templates/hello.html:
--------------------------------------------------------------------------------
1 |
2 | Hello from Flask
3 | {% if name %}
4 | Hello {{ name }}!
5 | {% else %}
6 | Hello, World!
7 | {% endif %}
--------------------------------------------------------------------------------
/hadoop/file/spark/config/spark-env.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | export SPARK_LOG_DIR=/vol/spark/log
4 | # fix warning in spark-shell
5 | export SPARK_LOCAL_IP=$(hostname -i | sed 's/^127.0.0.1 //')
6 |
--------------------------------------------------------------------------------
/cassandra/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 | services:
3 | cassandra:
4 | container_name: devops-cassandra
5 | image: cassandra:3.11
6 | ports:
7 | - 9042:9042
8 | volumes:
9 | - ./cql:/cql
10 |
--------------------------------------------------------------------------------
/aws/emr/dev.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | rm -fr .eggs/ *.egg-info */__pycache__/ */*/__pycache__/
4 |
5 | source venv/bin/activate
6 |
7 | pip install -e .
8 |
9 | export FLASK_APP=application
10 | export FLASK_DEBUG=1
11 | flask run
12 |
--------------------------------------------------------------------------------
/aws/emr/application/__init__.py:
--------------------------------------------------------------------------------
1 | from flask import Flask
2 |
3 | app = Flask(__name__)
4 | app.config.from_object('application.configuration.Config')
5 | #app.config.from_envvar('APPLICATION_SETTINGS', silent=True)
6 |
7 | import application.main
8 |
--------------------------------------------------------------------------------
/ansible/data/roles/schema-registry/handlers/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 |
3 | - name: restart schema-registry
4 | systemd:
5 | name: "{{ schema.registry.service_name }}"
6 | state: restarted
7 |
8 | - name: reload systemd
9 | command: systemctl daemon-reload
10 |
--------------------------------------------------------------------------------
/ansible/data/hosts:
--------------------------------------------------------------------------------
1 | [ansible]
2 | 192.168.100.10
3 |
4 | [cluster]
5 | #ip-192-168-100-11.local
6 | 192.168.100.11
7 | 192.168.100.12
8 | 192.168.100.13
9 |
10 | [docker]
11 | 192.168.100.11
12 | 192.168.100.12
13 |
14 | [schema-registry]
15 | 192.168.100.11
16 |
--------------------------------------------------------------------------------
/hadoop/file/zeppelin/config/zeppelin-env.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export ZEPPELLIN_BASE_PATH=/vol/zeppelin
4 | export ZEPPELIN_LOG_DIR=${ZEPPELLIN_BASE_PATH}/log
5 | export ZEPPELIN_NOTEBOOK_DIR=${ZEPPELLIN_BASE_PATH}/notebook
6 |
7 | export ZEPPELIN_MEM="-Xms1024m -Xmx1024m"
8 |
--------------------------------------------------------------------------------
/aws/emr/requirements.txt:
--------------------------------------------------------------------------------
1 | astroid==1.6.1
2 | click==6.7
3 | Flask==1.1.1
4 | isort==4.3.4
5 | itsdangerous==0.24
6 | Jinja2==2.10.1
7 | lazy-object-proxy==1.3.1
8 | MarkupSafe==1.0
9 | mccabe==0.6.1
10 | pylint==1.8.2
11 | six==1.11.0
12 | Werkzeug==0.15.3
13 | wrapt==1.10.11
14 |
--------------------------------------------------------------------------------
/aws/emr/README.md:
--------------------------------------------------------------------------------
1 | # aws-emr
2 |
3 | ### Development
4 |
5 | ```
6 | # create
7 | virtualenv -p $(which python3) venv
8 |
9 | # activate virtualenv
10 | source venv/bin/activate
11 |
12 | # development script
13 | ./dev.sh
14 |
15 | # deactivate virtualenv
16 | deactivate
17 | ```
18 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | click==7.1.2
2 | future==0.18.2
3 | Jinja2==2.11.3
4 | joblib==0.14.1
5 | livereload==2.6.1
6 | lunr==0.5.6
7 | Markdown==3.2.2
8 | MarkupSafe==1.1.1
9 | mkdocs==1.1
10 | nltk==3.5
11 | PyYAML==5.4
12 | regex==2020.5.7
13 | six==1.14.0
14 | tornado==6.0.4
15 | tqdm==4.46.0
16 |
--------------------------------------------------------------------------------
/base/supervisor.sed:
--------------------------------------------------------------------------------
1 | s/logfile=\/tmp\/supervisord.log/logfile=\/var\/log\/supervisord.log/
2 | s/pidfile=\/tmp\/supervisord.pid/pidfile=\/var\/run\/supervisord.pid/
3 | s/nodaemon=false/nodaemon=true/
4 | s/\;\[include\]/\[include\]/
5 | s/\;files = relative\/directory\/\*.ini/files = \/etc\/supervisor\/conf.d\/\*/
6 |
--------------------------------------------------------------------------------
/hadoop/example/map-reduce/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | #Tue Jan 23 20:22:38 GMT 2018
2 | distributionBase=GRADLE_USER_HOME
3 | distributionPath=wrapper/dists
4 | zipStoreBase=GRADLE_USER_HOME
5 | zipStorePath=wrapper/dists
6 | distributionUrl=https\://services.gradle.org/distributions/gradle-4.4.1-all.zip
7 |
--------------------------------------------------------------------------------
/docs/azure.md:
--------------------------------------------------------------------------------
1 | # Azure
2 |
3 | * ARM template [documentation](https://docs.microsoft.com/en-us/azure/azure-resource-manager/templates)
4 | * Azure Automation [documentation](https://docs.microsoft.com/en-us/azure/automation)
5 | * Azure Security Center [documentation](https://docs.microsoft.com/en-us/azure/security-center)
6 |
--------------------------------------------------------------------------------
/aws/emr/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.6
2 |
3 | WORKDIR /usr/src
4 |
5 | COPY requirements.txt ./
6 | RUN pip install --no-cache-dir -r requirements.txt
7 |
8 | COPY ./application ./application
9 |
10 | COPY setup.py setup.cfg MANIFEST.in ./
11 | RUN pip install --editable .
12 |
13 | CMD [ "python", "./application/main.py" ]
14 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | .DS_Store
3 |
4 | */.vagrant
5 |
6 | ansible/.share
7 | ansible/data/site.retry
8 | aws/*/logs/
9 | cassandra/.cassandra
10 | hadoop/.data/
11 |
12 | .gradle/
13 | build/
14 |
15 | __pycache__
16 | *.pyc
17 | .pytest_cache/
18 | venv/
19 | .eggs/
20 | *.egg-info
21 |
22 | *.iml
23 | .idea/
24 | target/
25 | .vscode/
26 | *.log
27 |
28 | site
29 |
--------------------------------------------------------------------------------
/aws/emr/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 |
3 | setup(
4 | name='aws-emr',
5 | version='0.1',
6 | packages=['application'],
7 | include_package_data=True,
8 | install_requires=[
9 | 'flask',
10 | ],
11 | setup_requires=[
12 | 'pytest-runner',
13 | ],
14 | tests_require=[
15 | 'pytest',
16 | ],
17 | )
18 |
--------------------------------------------------------------------------------
/aws/emr/tests/application_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | class ApplicationTestCase(unittest.TestCase):
4 |
5 | def setUp(self):
6 | print('test setUp')
7 |
8 | def tearDown(self):
9 | print('test tearDown')
10 |
11 | def test_example(self):
12 | assert 'aaa' in 'aaa'
13 |
14 | if __name__ == '__main__':
15 | unittest.main()
16 |
--------------------------------------------------------------------------------
/zookeeper/supervisor.ini:
--------------------------------------------------------------------------------
1 | [program:zookeeper]
2 | command=/opt/zookeeper/bin/zkServer.sh start-foreground
3 | redirect_stderr=false
4 | stdout_logfile=/var/log/zookeeper/stdout
5 | stdout_logfile_maxbytes=0
6 | stderr_logfile=/var/log/zookeeper/stderr
7 | stderr_logfile_maxbytes=0
8 | stopsignal=INT
9 | numprocs_start=1
10 | startsecs=2
11 | autostart=true
12 | autorestart=true
13 |
--------------------------------------------------------------------------------
/hadoop/example/spark/build.sbt:
--------------------------------------------------------------------------------
1 | import Dependencies.{V, allDependencies}
2 |
3 | lazy val root = (project in file(".")).
4 | settings(
5 | inThisBuild(List(
6 | organization := "com.github.niqdev",
7 | scalaVersion := V.scala,
8 | version := "0.1.0-SNAPSHOT"
9 | )),
10 | name := "spark-github",
11 | libraryDependencies ++= allDependencies
12 | )
13 |
--------------------------------------------------------------------------------
/kafka/supervisor-connect.ini:
--------------------------------------------------------------------------------
1 | [program:connect]
2 | command=/opt/kafka/bin/connect-distributed.sh /opt/kafka/config/connect-distributed.properties
3 | redirect_stderr=false
4 | stdout_logfile=/var/log/connect/stdout
5 | stdout_logfile_maxbytes=0
6 | stderr_logfile=/var/log/connect/stderr
7 | stderr_logfile_maxbytes=0
8 | stopsignal=INT
9 | numprocs_start=1
10 | startsecs=2
11 | autostart=true
12 | autorestart=true
13 |
--------------------------------------------------------------------------------
/hadoop/file/spark/config/spark-defaults.conf:
--------------------------------------------------------------------------------
1 | spark.master yarn
2 | # TODO spark.yarn.jars hdfs://namenode.local:9000/user/spark/share/lib/*.jar
3 | # TODO spark.yarn.archive hdfs://namenode.local:9000/user/spark/share/spark-archive.zip
4 |
5 | # history server
6 | spark.eventLog.enabled true
7 | spark.eventLog.dir hdfs://namenode.local:9000/user/spark/log
8 | spark.history.fs.logDirectory hdfs://namenode.local:9000/user/spark/log
9 |
--------------------------------------------------------------------------------
/hadoop/example/map-reduce/build.gradle:
--------------------------------------------------------------------------------
1 | apply plugin: 'java-library'
2 | apply plugin: 'application'
3 |
4 | repositories {
5 | jcenter()
6 | }
7 |
8 | mainClassName = "com.github.niqdev.WordCount"
9 |
10 | jar {
11 | manifest {
12 | attributes 'Main-Class': "$mainClassName"
13 | }
14 | }
15 |
16 | dependencies {
17 | compile group: 'org.apache.hadoop', name: 'hadoop-client', version: '2.7.5'
18 | }
19 |
--------------------------------------------------------------------------------
/hadoop/file/hadoop/profile-hadoop.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | export HADOOP_HOME=/usr/local/hadoop
4 | export PATH=${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin:${PATH}
5 |
6 | export HADOOP_LOG_PATH=/vol/hadoop/log
7 | export HADOOP_LOG_DIR=${HADOOP_LOG_PATH}/hadoop
8 | export YARN_LOG_DIR=${HADOOP_LOG_PATH}/yarn
9 | export HADOOP_MAPRED_LOG_DIR=${HADOOP_LOG_PATH}/mapred
10 |
11 | # required by spark
12 | export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
13 |
--------------------------------------------------------------------------------
/kafka/supervisor-kafka.ini:
--------------------------------------------------------------------------------
1 | [program:kafka]
2 | command=/opt/kafka/bin/kafka-server-start.sh /opt/kafka/config/server.properties --override zookeeper.connect="%(ENV_ZOOKEEPER_HOSTS)s"
3 | redirect_stderr=false
4 | stdout_logfile=/var/log/kafka/stdout
5 | stdout_logfile_maxbytes=0
6 | stderr_logfile=/var/log/kafka/stderr
7 | stderr_logfile_maxbytes=0
8 | stopsignal=INT
9 | numprocs_start=1
10 | startsecs=2
11 | autostart=true
12 | autorestart=true
13 |
--------------------------------------------------------------------------------
/.github/workflows/gh-pages.yml:
--------------------------------------------------------------------------------
1 | name: github-pages
2 | on:
3 | push:
4 | branches:
5 | - master
6 |
7 | jobs:
8 | build:
9 | name: Deploy docs
10 | runs-on: ubuntu-latest
11 | steps:
12 | - name: Checkout main
13 | uses: actions/checkout@v3.2.0
14 |
15 | - name: Deploy docs
16 | uses: mhausenblas/mkdocs-deploy-gh-pages@nomaterial
17 | env:
18 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
19 |
--------------------------------------------------------------------------------
/aws/emr/application/configuration.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | class DefaultConfig(object):
4 | APP_NAME = 'aws-emr'
5 | LOG_PATH = 'logs/application.log'
6 | ENVIRONMENT = 'DEFAULT'
7 | DEBUG = False
8 | HTTP_HOST = '127.0.0.1'
9 | HTTP_PORT = 5000
10 |
11 | class Config(DefaultConfig):
12 | # docker doesn't forward 127.0.0.1
13 | HTTP_HOST = os.getenv('HTTP_HOST', '0.0.0.0')
14 | HTTP_PORT = int(os.getenv('HTTP_PORT', 5000))
15 |
--------------------------------------------------------------------------------
/aws/emr/application/api/status_api.py:
--------------------------------------------------------------------------------
1 | from application import app
2 |
3 | from flask import jsonify
4 |
5 | @app.route('/status')
6 | def status():
7 | app.logger.debug('status')
8 | return jsonify({
9 | 'status': 'OK'
10 | })
11 |
12 | @app.route('/info')
13 | def info():
14 | app.logger.debug('info')
15 | return jsonify({
16 | 'application': app.config['APP_NAME'],
17 | 'env': app.config['ENVIRONMENT']
18 | })
19 |
--------------------------------------------------------------------------------
/hadoop/file/hosts:
--------------------------------------------------------------------------------
1 | # hadoop hosts
2 | 172.16.0.10 master master.local namenode.local secondary-namenode.local resource-manager.local web-proxy.local history.local
3 | 172.16.0.10 spark.local spark-history.local zeppelin.local postgres.local oozie.local
4 | 172.16.0.101 node-1 node-1.local datanode-1.local node-manager-1.local
5 | 172.16.0.102 node-2 node-2.local datanode-2.local node-manager-2.local
6 | 172.16.0.103 node-3 node-3.local datanode-3.local node-manager-3.local
7 |
--------------------------------------------------------------------------------
/miscellaneous/setup_k8s.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | curl -Lo minikube https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64 \
4 | && chmod +x minikube \
5 | && sudo mv minikube /usr/local/bin/
6 |
7 | curl -Lo kubectl https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl \
8 | && chmod +x kubectl \
9 | && sudo mv kubectl /usr/local/bin/
10 |
--------------------------------------------------------------------------------
/ansible/data/roles/common/tasks/motd.yml:
--------------------------------------------------------------------------------
1 | ---
2 |
3 | # custom banner
4 | # https://ownyourbits.com/2017/04/05/customize-your-motd-login-message-in-debian-and-ubuntu/
5 |
6 | - name: remove help banner from motd
7 | become: yes
8 | file:
9 | path: /etc/update-motd.d/10-help-text
10 | state: absent
11 |
12 | - name: add custom banner to motd
13 | become: yes
14 | template:
15 | src: motd
16 | dest: /etc/update-motd.d/10-custom-text
17 | mode: 0755
18 |
--------------------------------------------------------------------------------
/ansible/destroy_ansible.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # unofficial bash strict mode
4 | set -euo pipefail
5 | IFS=$'\n\t'
6 |
7 | # run from any directory (no symlink allowed)
8 | CURRENT_PATH=$(cd "$(dirname "${BASH_SOURCE[0]}")"; pwd -P)
9 | cd ${CURRENT_PATH}
10 |
11 | echo "[+] destroy ansible"
12 |
13 | read -p "Are you sure? [y/n]" -n 1 -r
14 | echo
15 | if [[ $REPLY =~ ^[Yy]$ ]]
16 | then
17 | vagrant destroy -f
18 |
19 | rm -frv \
20 | .vagrant \
21 | .share
22 | fi
23 |
24 | echo "[-] destroy ansible"
25 |
--------------------------------------------------------------------------------
/ansible/data/site.yml:
--------------------------------------------------------------------------------
1 | ---
2 |
3 | - name: test
4 | hosts: all
5 | tasks:
6 | - name: test uptime
7 | shell: uptime
8 | tags:
9 | - test
10 |
11 | - name: common setup
12 | hosts: cluster
13 | roles:
14 | - common
15 | tags:
16 | - common
17 |
18 | - name: docker setup
19 | hosts: docker
20 | roles:
21 | - docker
22 | tags:
23 | - docker
24 |
25 | - name: schema registry setup
26 | hosts: schema-registry
27 | roles:
28 | - schema-registry
29 | tags:
30 | - schema-registry
31 |
--------------------------------------------------------------------------------
/hadoop/file/hadoop/config/core-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | fs.defaultFS
6 | hdfs://namenode.local:9000
7 |
8 |
9 |
10 |
11 | hadoop.proxyuser.hadoop.hosts
12 | *
13 |
14 |
15 | hadoop.proxyuser.hadoop.groups
16 | *
17 |
18 |
19 |
--------------------------------------------------------------------------------
/hadoop/file/motd:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | echo "\nHostname: \033[1;31m$(hostname -s)\033[0m"
4 | echo "Uptime:$(uptime)\n"
5 |
6 | echo '* master: 172.16.0.10'
7 | echo '* node-1: 172.16.0.101\n'
8 |
9 | echo '* NameNode: http://namenode.local:50070'
10 | echo '* ResourceManager: http://resource-manager.local:8088'
11 | echo '* MapReduce Job History Server: http://history.local:19888'
12 | echo '* DataNode/NodeManager (1): http://node-1.local:8042/node\n'
13 |
14 | echo '* Spark: http://spark.local:4040'
15 | echo '* Zeppelin: http://zeppelin.local:8080'
16 | echo '* Oozie: http://oozie.local:11000'
17 |
--------------------------------------------------------------------------------
/ansible/setup_share.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # unofficial bash strict mode
4 | set -euo pipefail
5 | IFS=$'\n\t'
6 |
7 | # run from any directory (no symlink allowed)
8 | CURRENT_PATH=$(cd "$(dirname "${BASH_SOURCE[0]}")"; pwd -P)
9 | cd ${CURRENT_PATH}
10 |
11 | echo "[+] setup share"
12 |
13 | SHARE_PATH="$CURRENT_PATH/.share"
14 | SSH_PATH="$SHARE_PATH/ssh"
15 |
16 | echo "share path: $SHARE_PATH"
17 |
18 | rm -fr ${SHARE_PATH}
19 | mkdir -p ${SHARE_PATH}/node-{1,2,3} ${SSH_PATH}
20 |
21 | ssh-keygen -t rsa -b 4096 -C "ansible" -N "" -f "$SSH_PATH/ansible_rsa"
22 |
23 | echo "[-] setup share"
24 |
--------------------------------------------------------------------------------
/kafka/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 |
3 | services:
4 | zookeeper:
5 | container_name: devops-zookeeper
6 | build:
7 | context: ../zookeeper
8 | args:
9 | - VERSION=3.4.12
10 | ports:
11 | - 12181:2181
12 | networks:
13 | - devops_network
14 | kafka:
15 | container_name: devops-kafka
16 | build: .
17 | depends_on:
18 | - zookeeper
19 | ports:
20 | - 19092:9092
21 | networks:
22 | - devops_network
23 | environment:
24 | - ZOOKEEPER_HOSTS="zookeeper:2181"
25 |
26 | networks:
27 | devops_network:
28 |
--------------------------------------------------------------------------------
/zookeeper/zoo.cfg:
--------------------------------------------------------------------------------
1 | # http://zookeeper.apache.org/doc/current/zookeeperAdmin.html
2 |
3 | # The number of milliseconds of each tick
4 | tickTime=2000
5 | # The number of ticks that the initial synchronization phase can take
6 | initLimit=10
7 | # The number of ticks that can pass between sending a request and getting an acknowledgement
8 | syncLimit=5
9 | # The directory where the snapshot is stored
10 | dataDir=/var/lib/zookeeper/data
11 | # The port at which the clients will connect
12 | clientPort=2181
13 | # Write the transaction log to the dataLogDir rather than the dataDir
14 | dataLogDir=/var/log/zookeeper
15 |
--------------------------------------------------------------------------------
/hadoop/file/hadoop/config/hdfs-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | dfs.namenode.name.dir
6 | file:///vol/hadoop/namenode
7 |
8 |
9 | dfs.namenode.checkpoint.dir
10 | file:///vol/hadoop/secondary
11 |
12 |
13 | dfs.datanode.data.dir
14 | file:///vol/hadoop/datanode
15 |
16 |
17 |
--------------------------------------------------------------------------------
/ansible/data/roles/common/tasks/package.yml:
--------------------------------------------------------------------------------
1 | ---
2 |
3 | - name: update & upgrade
4 | become: yes
5 | apt:
6 | update_cache: yes
7 | cache_valid_time: "{{ apt_cache }}"
8 | upgrade: dist
9 |
10 | - name: install common packages
11 | become: yes
12 | apt:
13 | name:
14 | - jq
15 | - tree
16 | - httpie
17 | state: present
18 | update_cache: yes
19 | cache_valid_time: "{{ apt_cache }}"
20 |
21 | - import_tasks: oracle-jdk.yml
22 | tags:
23 | - oracle-jdk
24 |
25 | - name: cleanup
26 | become: yes
27 | apt:
28 | autoclean: yes
29 | autoremove: yes
30 |
--------------------------------------------------------------------------------
/aws/emr/application/main.py:
--------------------------------------------------------------------------------
1 | from application import app
2 | from application.logger import Logger
3 |
4 | Logger().init()
5 |
6 | # api
7 | import application.api.status_api
8 | import application.api.example_api
9 | import application.api.emr_api
10 |
11 | # if run with cli this is NOT executed
12 | if __name__ == '__main__':
13 | app.logger.info('start application: [{0}] @ {1}:{2} in DEBUG={3}'.format(
14 | app.config['APP_NAME'], app.config['HTTP_HOST'], app.config['HTTP_PORT'], app.config['DEBUG']))
15 | app.run(host=app.config['HTTP_HOST'], port=app.config['HTTP_PORT'], debug=app.config['DEBUG'])
16 |
--------------------------------------------------------------------------------
/hadoop/example/spark/project/Dependencies.scala:
--------------------------------------------------------------------------------
1 | import sbt._
2 |
3 | object Dependencies {
4 |
5 | lazy val N = new {
6 | val spark = "org.apache.spark"
7 | }
8 |
9 | lazy val V = new {
10 | val scala = "2.11.12"
11 |
12 | val spark = "2.2.1"
13 |
14 | val scalatest = "3.0.5"
15 | }
16 |
17 | lazy val libDependencies = Seq(
18 | N.spark %% "spark-core" % V.spark % Provided,
19 | N.spark %% "spark-sql" % V.spark % Provided
20 | )
21 |
22 | lazy val testDependencies = Seq(
23 | "org.scalatest" %% "scalatest" % V.scalatest % Test
24 | )
25 |
26 | lazy val allDependencies = libDependencies ++ testDependencies
27 |
28 | }
29 |
--------------------------------------------------------------------------------
/aws/emr/application/service/emr_service.py:
--------------------------------------------------------------------------------
1 | from application import app
2 |
3 | class EmrService(object):
4 |
5 | def create_cluster(self):
6 | app.logger.debug('TODO create_cluster')
7 | return {
8 | 'instance_id': 'TODO_INSTANCE_ID'
9 | }
10 |
11 | def destroy_cluster(self):
12 | app.logger.debug('TODO destroy_cluster')
13 | return {
14 | 'instance_id': 'TODO_INSTANCE_ID'
15 | }
16 |
17 | def info_cluster(self):
18 | app.logger.debug('TODO info_cluster')
19 | return {
20 | 'instance_id': 'TODO_INSTANCE_ID',
21 | 'name': 'TODO_NAME'
22 | }
23 |
--------------------------------------------------------------------------------
/cassandra/cql/all_users.csv:
--------------------------------------------------------------------------------
1 | firstNameCsvAll1;"{'home': {street: 'street1'; city: 'city1'; state: 'STATE'; zip_code: 12345}}";;"{'csv1a@example.com'; 'csv1b@example.com'}";True;;lastNameCsv1;;
2 | firstNameCsvAll2;"{'home': {street: 'street1'; city: 'city1'; state: 'STATE'; zip_code: 12345}}";;"{'csv2a@example.com'; 'csv2b@example.com'}";True;;lastNameCsv2;;
3 | firstNameCsvAll3;"{'home': {street: 'street1'; city: 'city1'; state: 'STATE'; zip_code: 12345}}";;"{'csv3a@example.com'; 'csv3b@example.com'}";False;;lastNameCsv3;;
4 | firstNameCsvAll4;"{'home': {street: 'street1'; city: 'city1'; state: 'STATE'; zip_code: 12345}}";;"{'csv4a@example.com'; 'csv4b@example.com'}";False;;lastNameCsv4;;
5 |
--------------------------------------------------------------------------------
/ansible/data/roles/schema-registry/docker-compose-local.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 |
3 | services:
4 |
5 | zookeeper:
6 | container_name: my-local-zookeeper
7 | image: niqdev/zookeeper:3.4.13
8 | ports:
9 | - 2181:2181
10 | hostname: zookeeper
11 | networks:
12 | - my_local_network
13 |
14 | kafka:
15 | container_name: my-local-kafka
16 | image: niqdev/kafka:2.0.0
17 | depends_on:
18 | - zookeeper
19 | ports:
20 | - 9092:9092
21 | - 8083:8083
22 | hostname: kafka
23 | networks:
24 | - my_local_network
25 | environment:
26 | - ZOOKEEPER_HOSTS="zookeeper:2181"
27 |
28 | networks:
29 | my_local_network:
30 |
--------------------------------------------------------------------------------
/ansible/setup_ansible.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # unofficial bash strict mode
4 | set -euo pipefail
5 | IFS=$'\n\t'
6 |
7 | # run from any directory (no symlink allowed)
8 | CURRENT_PATH=$(cd "$(dirname "${BASH_SOURCE[0]}")"; pwd -P)
9 | cd ${CURRENT_PATH}
10 |
11 | echo "[+] setup ansible"
12 |
13 | sudo apt-add-repository ppa:ansible/ansible
14 | sudo apt-get update
15 |
16 | sudo apt-get install -y \
17 | software-properties-common \
18 | ansible
19 |
20 | # http://docs.ansible.com/ansible/latest/intro_getting_started.html#host-key-checking
21 | sudo sed -i -r "s/#host_key_checking = False/host_key_checking = False/" /etc/ansible/ansible.cfg
22 |
23 | echo "[-] setup ansible"
24 |
--------------------------------------------------------------------------------
/docs-todo/_aws.md:
--------------------------------------------------------------------------------
1 | # AWS
2 |
3 | > TODO
4 |
5 | Documentation
6 |
7 | * [Boto 3](https://boto3.readthedocs.io/en/latest/reference/services/index.html)
8 |
9 | ## CLI
10 |
11 | TODO
12 |
13 | ## Setup
14 |
15 | Build `devops/aws-emr` image
16 | ```bash
17 | # change path
18 | cd devops/aws/emr
19 |
20 | # build image
21 | docker build -t devops/aws-emr .
22 |
23 | # start temporary container [port=HOST:CONTAINER]
24 | docker run \
25 | --rm \
26 | -e HTTP_PORT=8080 \
27 | -p 5000:8080 \
28 | --name aws-emr \
29 | devops/aws-emr:latest
30 |
31 | # access container
32 | docker exec -it aws-emr bash
33 | ```
34 |
35 | ### S3
36 |
37 | TODO
38 |
39 | ### EMR
40 |
41 | TODO
42 |
--------------------------------------------------------------------------------
/ansible/data/roles/common/tasks/oracle-jdk.yml:
--------------------------------------------------------------------------------
1 | ---
2 |
3 | - name: add java repository
4 | tags:
5 | - oracle-jdk
6 | become: yes
7 | apt_repository:
8 | repo: 'ppa:linuxuprising/java'
9 | state: present
10 |
11 | - name: accept oracle license
12 | tags:
13 | - oracle-jdk
14 | become: yes
15 | debconf: name='oracle-java11-installer' question='shared/accepted-oracle-license-v1-2' value='true' vtype='select'
16 |
17 | - name: install java
18 | tags:
19 | - oracle-jdk
20 | become: yes
21 | apt:
22 | name: "{{ packages }}"
23 | state: latest
24 | vars:
25 | packages:
26 | - oracle-java11-installer
27 | - oracle-java11-set-default
28 |
--------------------------------------------------------------------------------
/cassandra/cql/column_users.csv:
--------------------------------------------------------------------------------
1 | first_name,last_name,addresses,emails,enable
2 | firstNameCsv1,lastNameCsv1,"{'home': {street: 'street1', city: 'city1', state: 'STATE', zip_code: 12345}}","{'csv1a@example.com', 'csv1b@example.com'}",True
3 | firstNameCsv2,lastNameCsv2,"{'home': {street: 'street1', city: 'city1', state: 'STATE', zip_code: 12345}}","{'csv2a@example.com', 'csv2b@example.com'}",True
4 | firstNameCsv3,lastNameCsv3,"{'home': {street: 'street1', city: 'city1', state: 'STATE', zip_code: 12345}}","{'csv3a@example.com', 'csv3b@example.com'}",False
5 | firstNameCsv4,lastNameCsv4,"{'home': {street: 'street1', city: 'city1', state: 'STATE', zip_code: 12345}}","{'csv4a@example.com', 'csv4b@example.com'}",False
6 |
--------------------------------------------------------------------------------
/ansible/data/roles/docker/tasks/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 |
3 | - name: create docker group
4 | become: yes
5 | group:
6 | name: docker
7 | state: present
8 |
9 | - name: create docker user
10 | become: yes
11 | user:
12 | name: docker
13 | shell: /bin/bash
14 | groups: docker,sudo
15 | append: yes
16 |
17 | - name: install docker
18 | become: yes
19 | #become_user: docker
20 | command: 'bash -c "curl -fsSL https://get.docker.com/ | sh"'
21 |
22 | - name: install docker-compose
23 | become: yes
24 | #become_user: docker
25 | get_url:
26 | url: "https://github.com/docker/compose/releases/download/1.22.0/docker-compose-Linux-x86_64"
27 | dest: /usr/local/bin/docker-compose
28 | mode: +x
29 |
--------------------------------------------------------------------------------
/zookeeper/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM devops/base:latest
2 | #FROM niqdev/phusion-base:latest
3 |
4 | ARG VERSION=3.5.5
5 |
6 | ENV ZOOKEEPER_HOME "/opt/zookeeper"
7 | ENV PATH "$ZOOKEEPER_HOME/bin:$PATH"
8 |
9 | RUN apt-get install -y \
10 | telnet \
11 | netcat && \
12 | apt-get clean
13 |
14 | RUN curl https://www-eu.apache.org/dist/zookeeper/zookeeper-${VERSION}/apache-zookeeper-${VERSION}-bin.tar.gz | tar -xzf - -C /opt && \
15 | mv /opt/apache-zookeeper-${VERSION}-bin /opt/zookeeper-${VERSION} && \
16 | ln -s /opt/zookeeper-${VERSION} /opt/zookeeper && \
17 | mkdir -p /var/log/zookeeper /var/lib/zookeeper/data
18 |
19 | ADD zoo.cfg /opt/zookeeper/conf/zoo.cfg
20 | ADD supervisor.ini /etc/supervisor/conf.d/zookeeper.conf
21 |
--------------------------------------------------------------------------------
/base/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM phusion/baseimage:latest-amd64
2 |
3 | RUN apt-get update && apt-get upgrade -y
4 | RUN add-apt-repository ppa:openjdk-r/ppa -y
5 |
6 | RUN apt-get update && apt-get install -y \
7 | iputils-ping \
8 | python2.7 \
9 | python-pip \
10 | httpie \
11 | jq \
12 | openjdk-8-jdk && \
13 | apt-get clean
14 |
15 | ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64
16 |
17 | RUN pip install --upgrade pip wheel setuptools supervisor
18 |
19 | ADD supervisor.sed /tmp/supervisor.sed
20 |
21 | RUN echo_supervisord_conf > /etc/supervisord.conf && \
22 | sed -i -r -f /tmp/supervisor.sed /etc/supervisord.conf && \
23 | mkdir -p /etc/supervisor/conf.d
24 |
25 | CMD ["supervisord", "-c", "/etc/supervisord.conf", "-n"]
26 |
--------------------------------------------------------------------------------
/docs-todo/_neo4j.md:
--------------------------------------------------------------------------------
1 | # Neo4j
2 |
3 | > TODO
4 |
5 | * [Graph Databases](TODO) (2015) by Ian Robinson, Jim Webber, and Emil Eifrem (Book)
6 |
7 | Graph databases help leveraging complex and dynamic relationships in highly connected data to generate insight and competitive advantage. Connected data is data whose interpretation and value requires users first to understand the ways in which its constituent elements are related.
8 |
9 | > https://github.com/iansrobinson/graph-databases-use-cases
10 |
11 | **What Is a Graph?**
12 |
13 | A graph is just a collection of vertices and edges or, in different words, a set of nodes and the relationships that connect them. Graphs represent entities as nodes and the ways in which those entities relate to the world as relationships.
14 |
--------------------------------------------------------------------------------
/hadoop/example/map-reduce/src/main/java/com/github/niqdev/IntSumReducer.java:
--------------------------------------------------------------------------------
1 | package com.github.niqdev;
2 |
3 | import org.apache.hadoop.io.IntWritable;
4 | import org.apache.hadoop.io.Text;
5 | import org.apache.hadoop.mapreduce.Reducer;
6 |
7 | import java.io.IOException;
8 |
9 | public class IntSumReducer extends Reducer {
10 |
11 | private IntWritable result = new IntWritable();
12 |
13 | @Override
14 | protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
15 | int sum = 0;
16 | for (IntWritable value : values) {
17 | sum += value.get();
18 | }
19 | result.set(sum);
20 | context.write(key, result);
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
1 | site_name: DevOps
2 | site_author: niqdev
3 | repo_url: https://github.com/niqdev/devops
4 | theme: readthedocs
5 | google_analytics: ['UA-68888222-4', 'niqdev.github.io']
6 |
7 | nav:
8 | - Linux: linux.md
9 | - Docker: docker.md
10 | - Ansible: ansible.md
11 | - Cassandra: cassandra.md
12 | - ZooKeeper: zookeeper.md
13 | - Kafka: kafka.md
14 | - Hadoop: hadoop.md
15 | - Cloud: cloud.md
16 | - Kubernetes: kubernetes.md
17 | - System Design: system-design.md
18 | - Operating System: operating-system.md
19 | - Programming: programming.md
20 | - Other Resources: other-resources.md
21 | - Toolbox: toolbox.md
22 | - JVM (OLD): jvm.md
23 | - Scala (OLD): scala.md
24 |
25 | # disable search plugin
26 | #plugins: []
27 |
--------------------------------------------------------------------------------
/hadoop/file/hadoop/config/mapred-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | mapreduce.framework.name
6 | yarn
7 |
8 |
9 |
10 |
11 | mapreduce.jobhistory.address
12 | history.local:10020
13 |
14 |
15 | mapreduce.jobhistory.webapp.address
16 | history.local:19888
17 |
18 |
19 |
20 | yarn.app.mapreduce.am.staging-dir
21 | /mr-history
22 |
23 |
24 |
--------------------------------------------------------------------------------
/aws/emr/application/logger.py:
--------------------------------------------------------------------------------
1 | from application import app
2 |
3 | import os
4 | import logging
5 | from logging.handlers import TimedRotatingFileHandler
6 |
7 | class Logger(object):
8 |
9 | def __init__(self):
10 | self.log_path = app.config['LOG_PATH']
11 |
12 | def init(self):
13 | # create directory if doesn't exist
14 | os.makedirs(os.path.dirname(self.log_path), exist_ok=True)
15 |
16 | formatter = logging.Formatter("[%(asctime)s][%(levelname)s][%(pathname)s:%(lineno)d] %(message)s")
17 | handler = TimedRotatingFileHandler(self.log_path, when='midnight', interval=1, backupCount=5)
18 | handler.setLevel(logging.DEBUG)
19 | handler.setFormatter(formatter)
20 |
21 | app.logger.addHandler(handler)
22 | app.logger.setLevel(logging.DEBUG)
23 | app.logger.debug('init logger')
24 |
--------------------------------------------------------------------------------
/hadoop/file/hadoop/config/fair-scheduler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
9 |
10 | 60.0
11 | 0.8
12 | 120
13 |
14 |
15 | 40.0
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/hadoop/example/map-reduce/src/main/java/com/github/niqdev/TokenizerMapper.java:
--------------------------------------------------------------------------------
1 | package com.github.niqdev;
2 |
3 | import org.apache.hadoop.io.IntWritable;
4 | import org.apache.hadoop.io.Text;
5 | import org.apache.hadoop.mapreduce.Mapper;
6 |
7 | import java.io.IOException;
8 | import java.util.StringTokenizer;
9 |
10 | public class TokenizerMapper extends Mapper