├── deployment ├── ansible │ ├── files │ │ ├── aws │ │ │ ├── .githolder │ │ │ └── keys.json │ │ └── gcp │ │ │ └── .githolder │ ├── roles │ │ ├── h2o │ │ │ ├── handlers │ │ │ │ └── main.yml │ │ │ ├── templates │ │ │ │ └── systemd │ │ │ │ │ └── h2o.service.j2 │ │ │ └── tasks │ │ │ │ └── main.yml │ │ ├── nifi │ │ │ ├── handlers │ │ │ │ └── main.yml │ │ │ ├── templates │ │ │ │ ├── systemd │ │ │ │ │ └── nifi.service.j2 │ │ │ │ ├── nifi-env.sh.j2 │ │ │ │ ├── bootstrap.conf.j2 │ │ │ │ └── nifi.properties.j2 │ │ │ └── tasks │ │ │ │ └── main.yml │ │ ├── openrefine │ │ │ ├── handlers │ │ │ │ └── main.yml │ │ │ ├── templates │ │ │ │ └── systemd │ │ │ │ │ └── openrefine.service.j2 │ │ │ └── tasks │ │ │ │ └── main.yml │ │ ├── superset │ │ │ ├── handlers │ │ │ │ └── main.yml │ │ │ ├── templates │ │ │ │ ├── superset_config │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── superset_config.py.j2 │ │ │ │ ├── superset_custom_config.pth │ │ │ │ ├── systemd │ │ │ │ │ └── superset.service.j2 │ │ │ │ └── superset.service.j2 │ │ │ └── tasks │ │ │ │ └── main.yml │ │ ├── common-server │ │ │ ├── templates │ │ │ │ ├── timezone.j2 │ │ │ │ ├── locale.j2 │ │ │ │ └── limits.j2 │ │ │ ├── handlers │ │ │ │ └── main.yml │ │ │ └── tasks │ │ │ │ └── main.yml │ │ ├── java │ │ │ ├── files │ │ │ │ └── webupd8team-java.list │ │ │ └── tasks │ │ │ │ ├── add-repository-on-ubuntu.yml │ │ │ │ ├── add-repository-on-debian.yml │ │ │ │ └── main.yml │ │ ├── anaconda │ │ │ └── tasks │ │ │ │ ├── templates │ │ │ │ ├── systemd │ │ │ │ │ └── anaconda.service.j2 │ │ │ │ └── jupyter_notebook_config.py.j2 │ │ │ │ └── main.yml │ │ ├── ec2_common │ │ │ └── tasks │ │ │ │ └── main.yml │ │ └── ec2_instance │ │ │ └── tasks │ │ │ └── main.yml │ ├── setup_h2o.yml │ ├── setup_nifi.yml │ ├── setup_anaconda.yml │ ├── setup_openrefine.yml │ ├── setup_superset.yml │ ├── hosts │ ├── ansible.cfg │ ├── setup_on_vps.yml │ ├── setup_on_vbox.yml │ ├── group_vars │ │ └── all │ │ │ └── vars_file.yml │ ├── setup_gc_instance.yml │ └── setup_on_aws.yml └── docker │ ├── h2o │ ├── .dockerignore │ ├── templates │ │ └── supervisord.conf │ └── Dockerfile │ ├── nifi │ ├── .dockerignore │ ├── config │ │ └── vars_file.yml │ ├── templates │ │ ├── supervisord.conf │ │ ├── nifi-env.sh.j2 │ │ ├── bootstrap.conf.j2 │ │ └── nifi.properties.j2 │ └── Dockerfile │ ├── jupyter │ ├── .dockerignore │ ├── Dockerfile │ └── templates │ │ └── jupyter_notebook_config.py.j2 │ ├── superset │ ├── .dockerignore │ ├── templates │ │ ├── superset │ │ │ └── superset_config.py │ │ └── create-user.sh │ ├── config │ │ └── vars_file.yml │ └── Dockerfile │ ├── openrefine │ ├── .dockerignore │ ├── config │ │ └── vars_file.yml │ ├── templates │ │ └── supervisord.conf │ ├── start.sh │ └── Dockerfile │ ├── docker-compose.yaml │ ├── docker-stack-prod.yaml │ └── docker-stack-dev.yaml ├── .gitignore └── vagrant └── Vagrantfile /deployment/ansible/files/aws/.githolder: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /deployment/ansible/files/gcp/.githolder: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /deployment/ansible/roles/h2o/handlers/main.yml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /deployment/ansible/roles/nifi/handlers/main.yml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /deployment/ansible/roles/openrefine/handlers/main.yml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /deployment/ansible/roles/superset/handlers/main.yml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /deployment/ansible/roles/superset/templates/superset_config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /deployment/ansible/roles/common-server/templates/timezone.j2: -------------------------------------------------------------------------------- 1 | Europe/London 2 | -------------------------------------------------------------------------------- /deployment/ansible/roles/java/files/webupd8team-java.list: -------------------------------------------------------------------------------- 1 | deb http://ppa.launchpad.net/webupd8team/java/ubuntu xenial main -------------------------------------------------------------------------------- /deployment/docker/h2o/.dockerignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | publish-local.sh 3 | publish-prod.sh -------------------------------------------------------------------------------- /deployment/docker/nifi/.dockerignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | publish-local.sh 3 | publish-prod.sh -------------------------------------------------------------------------------- /deployment/ansible/roles/common-server/templates/locale.j2: -------------------------------------------------------------------------------- 1 | LANG="en_US.UTF-8" 2 | LANGUAGE="en_US:en" 3 | LC_ALL="en_US.UTF-8" -------------------------------------------------------------------------------- /deployment/docker/jupyter/.dockerignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | publish-local.sh 3 | publish-prod.sh -------------------------------------------------------------------------------- /deployment/docker/superset/.dockerignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | publish-local.sh 3 | publish-prod.sh -------------------------------------------------------------------------------- /deployment/docker/openrefine/.dockerignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | publish-local.sh 3 | publish-prod.sh -------------------------------------------------------------------------------- /deployment/ansible/files/aws/keys.json: -------------------------------------------------------------------------------- 1 | { 2 | "aws_access_key" : "COPY_ACCESS_KEY_HERE", 3 | "aws_secret_key" : "COPY_SECRET_KEY_HERE" 4 | } -------------------------------------------------------------------------------- /deployment/ansible/setup_h2o.yml: -------------------------------------------------------------------------------- 1 | - name: Setup superset 2 | hosts: Cluster 3 | become: yes 4 | become_user: root 5 | roles: 6 | - h2o 7 | -------------------------------------------------------------------------------- /deployment/ansible/setup_nifi.yml: -------------------------------------------------------------------------------- 1 | - name: Setup superset 2 | hosts: Cluster 3 | become: yes 4 | become_user: root 5 | roles: 6 | - nifi 7 | -------------------------------------------------------------------------------- /deployment/docker/openrefine/config/vars_file.yml: -------------------------------------------------------------------------------- 1 | openrefine : 2 | host: 3 | ip : "0.0.0.0" 4 | port: "33332" 5 | java: 6 | Xmx: "2g" -------------------------------------------------------------------------------- /deployment/ansible/setup_anaconda.yml: -------------------------------------------------------------------------------- 1 | - name: Setup superset 2 | hosts: Cluster 3 | become: yes 4 | become_user: root 5 | roles: 6 | - anaconda 7 | -------------------------------------------------------------------------------- /deployment/ansible/setup_openrefine.yml: -------------------------------------------------------------------------------- 1 | - name: Setup superset 2 | hosts: Cluster 3 | become: yes 4 | become_user: root 5 | roles: 6 | - openrefine 7 | -------------------------------------------------------------------------------- /deployment/ansible/setup_superset.yml: -------------------------------------------------------------------------------- 1 | - name: Setup superset 2 | hosts: Cluster 3 | become: yes 4 | become_user: root 5 | roles: 6 | - superset 7 | -------------------------------------------------------------------------------- /deployment/docker/nifi/config/vars_file.yml: -------------------------------------------------------------------------------- 1 | nifi : 2 | host: 3 | ip : "0.0.0.0" 4 | port: "33331" 5 | java: 6 | Xmx: "2g" 7 | log_dir: "/var/log/nifi" -------------------------------------------------------------------------------- /deployment/ansible/hosts: -------------------------------------------------------------------------------- 1 | [Cluster] 2 | vds.verteego.local vname=vds.verteego.local localip=192.168.90.123 ansible_host=localhost ansible_ssh_port=2222 ansible_user=vagrant -------------------------------------------------------------------------------- /deployment/ansible/ansible.cfg: -------------------------------------------------------------------------------- 1 | [defaults] 2 | allow_world_readable_tmpfiles = True 3 | host_key_checking = False 4 | remote_tmp = /tmp 5 | control_path = %(directory)s/%%h-%%r -------------------------------------------------------------------------------- /deployment/docker/nifi/templates/supervisord.conf: -------------------------------------------------------------------------------- 1 | [supervisord] 2 | nodaemon=true 3 | 4 | [program:nifi] 5 | user=nifi 6 | startsecs = 0 7 | autorestart = false 8 | command= /opt/nifi/bin/nifi.sh run -------------------------------------------------------------------------------- /deployment/ansible/roles/superset/templates/superset_custom_config.pth: -------------------------------------------------------------------------------- 1 | # This file shoud be added to site-packages of virtualenv so that superset_config can be loaded at superset launch 2 | /opt/superset/superset_config -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .vagrant 3 | .bundle 4 | *.retry 5 | /deployment/ansible/files/*.json 6 | vagrant_1.8.0_x86_64.deb 7 | venv 8 | configuration 9 | ansible.json 10 | *.pem 11 | publish-local.sh 12 | publish-prod.sh -------------------------------------------------------------------------------- /deployment/ansible/roles/common-server/handlers/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # handlers 3 | - name: reload sysctl 4 | command: sysctl -p 5 | 6 | - name: update timezone 7 | command: dpkg-reconfigure --frontend noninteractive tzdata 8 | 9 | -------------------------------------------------------------------------------- /deployment/docker/openrefine/templates/supervisord.conf: -------------------------------------------------------------------------------- 1 | [supervisord] 2 | nodaemon=true 3 | 4 | [program:openrefine] 5 | user=openrefine 6 | startsecs = 0 7 | autorestart = false 8 | command= /opt/openrefine/refine -p PORT -i HOST -------------------------------------------------------------------------------- /deployment/docker/h2o/templates/supervisord.conf: -------------------------------------------------------------------------------- 1 | [supervisord] 2 | nodaemon=true 3 | 4 | [program:h2o] 5 | user=h2o 6 | startsecs = 0 7 | autorestart = false 8 | command= java -Xmx500m -jar /opt/h2o/h2o.jar -port 33333 -log_dir /var/log/h2o -------------------------------------------------------------------------------- /deployment/ansible/setup_on_vps.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: VPS vds Install 3 | hosts: all 4 | gather_facts: False 5 | become: True 6 | roles: 7 | - common-server 8 | - java 9 | - nifi 10 | - openrefine 11 | - superset 12 | - h2o 13 | - anaconda -------------------------------------------------------------------------------- /deployment/ansible/roles/nifi/templates/systemd/nifi.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Nifi server 3 | [Service] 4 | User=nifi 5 | Group=nifi 6 | PIDFile=/var/run/nifi.pid 7 | ExecStart=/opt/nifi/bin/nifi.sh run 8 | ExecStop=/opt/nifi/bin/nifi.sh stop 9 | [Install] 10 | WantedBy=multi-user.target 11 | -------------------------------------------------------------------------------- /deployment/docker/jupyter/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM jupyter/datascience-notebook 2 | 3 | ENV JUPYTER_PORT "33335" 4 | 5 | COPY templates/jupyter_notebook_config.py.j2 /home/jovyan/.jupyter/jupyter_notebook_config.py 6 | RUN sed -i s/PORT/${JUPYTER_PORT}/ /home/jovyan/.jupyter/jupyter_notebook_config.py 7 | 8 | EXPOSE $JUPYTER_PORT 9 | ENTRYPOINT ["start-notebook.sh"] -------------------------------------------------------------------------------- /deployment/ansible/roles/h2o/templates/systemd/h2o.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=H2O server 3 | [Service] 4 | User=h2o 5 | Group=h2o 6 | PIDFile=/var/run/h2o.pid 7 | ExecStart=/usr/bin/java -Xmx{{ h2o.java.Xmx }} -jar /opt/h2o/h2o.jar -port {{ h2o.host.port }} -log_dir {{ h2o.log_dir }} 8 | ExecStop=/bin/kill -HUP $MAINPID 9 | [Install] 10 | WantedBy=multi-user.target 11 | -------------------------------------------------------------------------------- /deployment/ansible/roles/superset/templates/systemd/superset.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Superset 3 | [Service] 4 | User=superset 5 | Group=superset 6 | PIDFile=/var/run/superset.pid 7 | ExecStart=/opt/superset/venv/bin/superset runserver -p {{ superset.host.port }} -a {{ superset.host.ip }} 8 | ExecStop=/bin/kill -HUP $MAINPID 9 | [Install] 10 | WantedBy=multi-user.target 11 | -------------------------------------------------------------------------------- /deployment/ansible/roles/openrefine/templates/systemd/openrefine.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Openrefine server 3 | [Service] 4 | User=openrefine 5 | Group=openrefine 6 | PIDFile=/var/run/openrefine.pid 7 | ExecStart=/opt/openrefine/refine -p {{ openrefine.host.port }} -i {{ openrefine.host.ip }} 8 | ExecStop=/bin/kill -HUP $MAINPID 9 | [Install] 10 | WantedBy=multi-user.target 11 | -------------------------------------------------------------------------------- /deployment/ansible/roles/java/tasks/add-repository-on-ubuntu.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Update package list 3 | apt: update_cache=yes cache_valid_time=3600 4 | become: yes 5 | 6 | - name: Install add-apt-repostory 7 | become: yes 8 | apt: name=software-properties-common state=latest 9 | 10 | - name: Add Oracle Java Repository 11 | become: yes 12 | apt_repository: repo='ppa:webupd8team/java' -------------------------------------------------------------------------------- /deployment/docker/superset/templates/superset/superset_config.py: -------------------------------------------------------------------------------- 1 | NIFI_PORT = "{{ nifi.host.port }}" 2 | NIFI_URI = "{{ superset.nifi.uri }}" 3 | OPENREFINE_PORT = "{{ openrefine.host.port }}" 4 | OPENREFINE_URI = "{{ superset.openrefine.uri }}" 5 | ANACONDA_PORT = "{{ anaconda.host.port }}" 6 | ANACONDA_URI = "{{ superset.anaconda.uri }}" 7 | H2O_PORT = "{{ h2o.host.port }}" 8 | H2O_URI = "{{ superset.h2o.uri }}" -------------------------------------------------------------------------------- /deployment/ansible/roles/anaconda/tasks/templates/systemd/anaconda.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Anaconda server 3 | [Service] 4 | User=anaconda 5 | Group=anaconda 6 | PIDFile=/var/run/anaconda.pid 7 | Environment="PATH=/opt/anaconda/anaconda3/bin:/usr/local/bin:/usr/bin:/bin" 8 | ExecStart=/opt/anaconda/anaconda3/bin/jupyter-notebook 9 | ExecStop=/bin/kill -HUP $MAINPID 10 | [Install] 11 | WantedBy=multi-user.target 12 | -------------------------------------------------------------------------------- /deployment/ansible/roles/superset/templates/superset_config/superset_config.py.j2: -------------------------------------------------------------------------------- 1 | # vds apps config 2 | NIFI_PORT = "{{ nifi.host.port }}" 3 | NIFI_URI = "{{ superset.nifi.uri }}" 4 | 5 | OPENREFINE_PORT = "{{ openrefine.host.port }}" 6 | OPENREFINE_URI = "{{ superset.openrefine.uri }}" 7 | 8 | ANACONDA_PORT = "{{ anaconda.host.port }}" 9 | ANACONDA_URI = "{{ superset.anaconda.uri }}" 10 | 11 | H2O_PORT = "{{ h2o.host.port }}" 12 | H2O_URI = "{{ superset.h2o.uri }}" 13 | -------------------------------------------------------------------------------- /deployment/ansible/roles/java/tasks/add-repository-on-debian.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Add source.list file 3 | copy: src=webupd8team-java.list dest=/etc/apt/sources.list.d/webupd8team-java.list 4 | register: add_source_result 5 | become: yes 6 | 7 | - name: Add apt-key 8 | apt_key: keyserver=hkp://keyserver.ubuntu.com:80 id=EEA14886 9 | become: yes 10 | 11 | - name: Update apt cache 12 | apt: update_cache=yes 13 | when: add_source_result.changed == true 14 | become: yes -------------------------------------------------------------------------------- /deployment/docker/openrefine/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -z "$REFINE_MEMORY" ] ; then 4 | TOTAL_MEMORY=`free -b | grep Mem | awk '{print $2}'` 5 | MIN_REFINE_MEMORY=$(( $TOTAL_MEMORY - 3 * 1024 * 1024 * 1024 )) 6 | REFINE_MEMORY=$(( $TOTAL_MEMORY * 6 / 10 )) 7 | 8 | if [ "$REFINE_MEMORY" -lt "$MIN_REFINE_MEMORY" ]; then 9 | REFINE_MEMORY="$MIN_REFINE_MEMORY" 10 | fi 11 | fi 12 | 13 | exec ./refine -i HOST -p PORT -d /mnt/refine -m $REFINE_MEMORY -------------------------------------------------------------------------------- /deployment/ansible/roles/java/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - include: add-repository-on-debian.yml 3 | 4 | - name: Accept Java 8 License 5 | become: yes 6 | debconf: name='oracle-java8-installer' question='shared/accepted-oracle-license-v1-1' value='true' vtype='select' 7 | 8 | - name: Install Oracle Java 8 9 | become: yes 10 | apt: name={{item}} state=latest update_cache=yes cache_valid_time=3600 11 | with_items: 12 | - oracle-java8-installer 13 | - ca-certificates 14 | - oracle-java8-set-default -------------------------------------------------------------------------------- /deployment/docker/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | services: 3 | superset: 4 | image: vdshub/superset 5 | ports: 6 | - "33330:8088" 7 | 8 | nifi: 9 | image: vdshub/nifi 10 | ports: 11 | - "33331:33331" 12 | 13 | openrefine: 14 | image: vdshub/openrefine 15 | command: /opt/openrefine/start.sh 16 | ports: 17 | - "33332:33332" 18 | 19 | jupyter: 20 | image: vdshub/jupyter 21 | ports: 22 | - "33335:33335" 23 | 24 | h2o: 25 | image: vdshub/h2o 26 | ports: 27 | - "33333:33333" 28 | command: supervisord -------------------------------------------------------------------------------- /deployment/ansible/setup_on_vbox.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Hostname setup 3 | hosts: Cluster 4 | gather_facts: False 5 | become: True 6 | tasks: 7 | - name: Hostname 8 | hostname: name={{ vname }} 9 | - name: Reverse lookup hostname 10 | lineinfile: 11 | dest: /etc/hosts 12 | state: present 13 | regexp: '^127.0.0.1\s+localhost' 14 | line: '127.0.0.1 localhost' 15 | 16 | - name: Common setup 17 | hosts: Cluster 18 | gather_facts: False 19 | become: True 20 | roles: 21 | - common-server 22 | - java 23 | - nifi 24 | - openrefine 25 | - superset 26 | - h2o 27 | - anaconda -------------------------------------------------------------------------------- /vagrant/Vagrantfile: -------------------------------------------------------------------------------- 1 | # -*- mode: ruby -*- 2 | # vi: set ft=ruby : 3 | 4 | # Vagrantfile API/syntax version. Don't touch unless you know what you're doing! 5 | VAGRANTFILE_API_VERSION = "2" 6 | 7 | Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| 8 | 9 | config.hostmanager.manage_host = true 10 | config.hostmanager.enabled = true 11 | config.ssh.insert_key = false 12 | config.ssh.private_key_path = ['~/.vagrant.d/insecure_private_key'] 13 | 14 | config.vm.define "vds" do |vds| 15 | vds.vm.box = "debian/jessie64" 16 | vds.disksize.size = '20GB' 17 | vds.vm.network "private_network", ip: "192.168.90.123" 18 | vds.vm.hostname = "vds.verteego.local" 19 | vds.vm.provider :virtualbox do |vb| 20 | vb.customize ["modifyvm", :id, "--memory", "6144"] 21 | vb.customize ["modifyvm", :id, "--cpus", "4"] 22 | end 23 | end 24 | 25 | end -------------------------------------------------------------------------------- /deployment/ansible/group_vars/all/vars_file.yml: -------------------------------------------------------------------------------- 1 | superset: 2 | admin: 3 | username : "vds-user" 4 | first_name : "vds" 5 | last_name : "user" 6 | email : "vds-user@verteego.com" 7 | password : "verteego" 8 | host: 9 | ip : "0.0.0.0" 10 | port: "33330" 11 | nifi: 12 | uri: "nifi" 13 | h2o: 14 | uri: "" 15 | openrefine: 16 | uri: "" 17 | anaconda: 18 | uri: "tree" 19 | 20 | nifi : 21 | host: 22 | ip : "0.0.0.0" 23 | port: "33331" 24 | java: 25 | Xmx: "2g" 26 | log_dir: "/var/log/nifi" 27 | 28 | openrefine : 29 | host: 30 | ip : "0.0.0.0" 31 | port: "33332" 32 | java: 33 | Xmx: "2g" 34 | 35 | h2o : 36 | host: 37 | ip : "0.0.0.0" 38 | port: "33333" 39 | java: 40 | Xmx: "2g" 41 | log_dir: "/var/log/h2o" 42 | 43 | anaconda : 44 | host: 45 | ip : "0.0.0.0" 46 | port: "33335" 47 | -------------------------------------------------------------------------------- /deployment/docker/superset/config/vars_file.yml: -------------------------------------------------------------------------------- 1 | superset: 2 | admin: 3 | username : "vds-user" 4 | first_name : "vds" 5 | last_name : "user" 6 | email : "vds-user@verteego.com" 7 | password : "verteego" 8 | host: 9 | ip : "0.0.0.0" 10 | port: "33330" 11 | nifi: 12 | uri: "nifi" 13 | h2o: 14 | uri: "" 15 | openrefine: 16 | uri: "" 17 | anaconda: 18 | uri: "tree" 19 | 20 | nifi : 21 | host: 22 | ip : "0.0.0.0" 23 | port: "33331" 24 | java: 25 | Xmx: "2g" 26 | log_dir: "/var/log/nifi" 27 | 28 | openrefine : 29 | host: 30 | ip : "0.0.0.0" 31 | port: "33332" 32 | java: 33 | Xmx: "2g" 34 | 35 | h2o : 36 | host: 37 | ip : "0.0.0.0" 38 | port: "33333" 39 | java: 40 | Xmx: "2g" 41 | log_dir: "/var/log/h2o" 42 | 43 | anaconda : 44 | host: 45 | ip : "0.0.0.0" 46 | port: "33335" 47 | -------------------------------------------------------------------------------- /deployment/docker/superset/templates/create-user.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env expect 2 | 3 | set timeout 120 4 | spawn fabmanager create-admin --app superset 5 | set username "{{superset.admin.username}}" 6 | set first_name "{{superset.admin.first_name}}" 7 | set last_name "{{superset.admin.last_name}}" 8 | set email "{{superset.admin.email}}" 9 | set password "{{superset.admin.password}}" 10 | expect { 11 | "Username \\\[admin\\\]:" { send "$username\n";} 12 | } 13 | sleep 2 14 | expect { 15 | "User first name \\\[admin\\\]:" { send "$first_name\n";} 16 | } 17 | sleep 2 18 | expect { 19 | "User last name \\\[user\\\]:" { send "$last_name\n";} 20 | } 21 | sleep 2 22 | expect { 23 | "Email \\\[admin@fab.org\\\]:" { send "$email\n";} 24 | } 25 | sleep 2 26 | expect { 27 | "Password" { send "$password\n";} 28 | } 29 | sleep 2 30 | expect { 31 | "Repeat for confirmation:" { send "$password\n"; } 32 | } 33 | expect eof -------------------------------------------------------------------------------- /deployment/ansible/roles/h2o/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Setup working directory 3 | file: 4 | owner: h2o 5 | group: h2o 6 | mode: u=rwx,g=rx,o=rx 7 | state: directory 8 | path: /opt/h2o 9 | 10 | - name: Setup logging directory 11 | file: 12 | owner: h2o 13 | group: h2o 14 | mode: u=rwx,g=rx,o=rx 15 | state: directory 16 | path: /var/log/h2o 17 | 18 | - unarchive: 19 | src: https://storage.googleapis.com/verteego-vds/h2o/build/h2o.tar.gz 20 | dest: /opt/h2o 21 | owner: h2o 22 | group: h2o 23 | remote_src: True 24 | 25 | - name: Copy h2o service script 26 | template: src={{item.src}} dest={{item.dest}} mode='u=rwx,g=rx,o=rx' 27 | with_items: 28 | - { src: 'templates/systemd/h2o.service.j2', dest: '/lib/systemd/system/h2o.service'} 29 | 30 | - name: enable h2o service at startup 31 | systemd: 32 | name: h2o 33 | enabled: yes 34 | masked: no 35 | 36 | - systemd: 37 | state: restarted 38 | daemon_reload: yes 39 | name: h2o -------------------------------------------------------------------------------- /deployment/docker/h2o/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM java:8-jre-alpine 2 | MAINTAINER s3ni0r 3 | 4 | ENV H2O_URL https://storage.googleapis.com/verteego-vds/h2o/build/h2o.tar.gz 5 | ENV H2O_HOME /opt/h2o 6 | ENV H2O_LOG_DIR /var/log/h2o 7 | ENV H2O_PORT 33333 8 | ENV PYTHON_VERSION=2.7.12-r0 9 | ENV PY_PIP_VERSION=8.1.2-r0 10 | ENV SUPERVISOR_VERSION=3.3.1 11 | 12 | WORKDIR $H2O_HOME 13 | 14 | RUN apk update && apk add -u python=$PYTHON_VERSION py-pip=$PY_PIP_VERSION 15 | RUN pip install supervisor==$SUPERVISOR_VERSION 16 | 17 | RUN set -xe \ 18 | && apk add --no-cache bash curl tar \ 19 | && curl -sSL ${H2O_URL} | tar xz \ 20 | && apk del curl tar \ 21 | && addgroup -g 1000 h2o \ 22 | && adduser -h ${H2O_HOME} -s /bin/bash -G h2o -u 1000 -D -H h2o \ 23 | && mkdir -p ${H2O_LOG_DIR} \ 24 | && chown -R h2o:h2o ${H2O_LOG_DIR} \ 25 | && chown -R h2o:h2o ${H2O_HOME} 26 | 27 | COPY templates/supervisord.conf /etc/supervisord.conf 28 | 29 | EXPOSE ${H2O_PORT} 30 | 31 | #USER h2o 32 | 33 | CMD ["supervisord", "--nodaemon", "--configuration", "/etc/supervisord.conf"] -------------------------------------------------------------------------------- /deployment/ansible/roles/openrefine/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Setup working directory 3 | file: 4 | owner: openrefine 5 | group: openrefine 6 | mode: u=rwx,g=rx,o=rx 7 | state: directory 8 | path: /opt/openrefine 9 | 10 | - name: Setup logging directory 11 | file: 12 | owner: openrefine 13 | group: openrefine 14 | mode: u=rwx,g=rx,o=rx 15 | state: directory 16 | path: /var/log/openrefine 17 | 18 | - unarchive: 19 | extra_opts: ['--strip=1'] 20 | src: https://storage.googleapis.com/verteego-vds/openrefine/dist/openrefine-linux-1.0.tar.gz 21 | owner: openrefine 22 | group: openrefine 23 | dest: /opt/openrefine 24 | remote_src: True 25 | 26 | - name: Copy openrefine service script 27 | template: src={{item.src}} dest={{item.dest}} mode='u=rwx,g=rx,o=rx' 28 | with_items: 29 | - { src: 'templates/systemd/openrefine.service.j2', dest: '/lib/systemd/system/openrefine.service'} 30 | 31 | - name: enable openrefine service at startup 32 | systemd: 33 | name: openrefine 34 | enabled: yes 35 | masked: no 36 | 37 | - systemd: 38 | state: restarted 39 | daemon_reload: yes 40 | name: openrefine 41 | -------------------------------------------------------------------------------- /deployment/docker/nifi/templates/nifi-env.sh.j2: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one or more 4 | # contributor license agreements. See the NOTICE file distributed with 5 | # this work for additional information regarding copyright ownership. 6 | # The ASF licenses this file to You under the Apache License, Version 2.0 7 | # (the "License"); you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | # The java implementation to use. 20 | #export JAVA_HOME=/usr/java/jdk1.8.0/ 21 | 22 | export NIFI_HOME=$(cd "${SCRIPT_DIR}" && cd .. && pwd) 23 | 24 | #The directory for the NiFi pid file 25 | export NIFI_PID_DIR="${NIFI_HOME}/run" 26 | 27 | #The directory for NiFi log files 28 | export NIFI_LOG_DIR="{{ nifi.log_dir }}" -------------------------------------------------------------------------------- /deployment/ansible/roles/nifi/templates/nifi-env.sh.j2: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one or more 4 | # contributor license agreements. See the NOTICE file distributed with 5 | # this work for additional information regarding copyright ownership. 6 | # The ASF licenses this file to You under the Apache License, Version 2.0 7 | # (the "License"); you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | # The java implementation to use. 20 | #export JAVA_HOME=/usr/java/jdk1.8.0/ 21 | 22 | export NIFI_HOME=$(cd "${SCRIPT_DIR}" && cd .. && pwd) 23 | 24 | #The directory for the NiFi pid file 25 | export NIFI_PID_DIR="${NIFI_HOME}/run" 26 | 27 | #The directory for NiFi log files 28 | export NIFI_LOG_DIR="{{ nifi.log_dir }}" -------------------------------------------------------------------------------- /deployment/docker/openrefine/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM java:8 2 | MAINTAINER s3ni0r 3 | 4 | ENV OPENREFINE_HOME /opt/openrefine 5 | ENV OPENREFINE_PORT 33332 6 | ENV OPENREFINE_HOST 0.0.0.0 7 | 8 | #RUN echo debconf shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections 9 | #RUN echo debconf shared/accepted-oracle-license-v1-1 seen true | /usr/bin/debconf-set-selections 10 | # 11 | #RUN apt-get -y -q update; \ 12 | # apt-get -y -q install wget software-properties-common supervisor 13 | # 14 | #RUN add-apt-repository 'deb http://ppa.launchpad.net/webupd8team/java/ubuntu precise main'; \ 15 | # apt-get -y -q update 16 | # 17 | #RUN apt-get install -y --force-yes -q oracle-java7-installer 18 | 19 | RUN useradd -ms /bin/bash openrefine 20 | RUN mkdir -p ${OPENREFINE_HOME} && \ 21 | chmod 755 ${OPENREFINE_HOME} && \ 22 | chown openrefine ${OPENREFINE_HOME} && \ 23 | chgrp openrefine ${OPENREFINE_HOME} 24 | 25 | # download and "mount" OpenRefine 26 | WORKDIR ${OPENREFINE_HOME} 27 | RUN wget -O - --no-check-certificate https://storage.googleapis.com/verteego-vds/openrefine/dist/openrefine-linux-1.0.tar.gz | tar -xz --strip=1 28 | #COPY templates/supervisord.conf /etc/supervisord.conf 29 | COPY ./start.sh ${OPENREFINE_HOME}/start.sh 30 | RUN chmod +x ${OPENREFINE_HOME}/start.sh 31 | RUN sed -i s/PORT/${OPENREFINE_PORT}/ ${OPENREFINE_HOME}/start.sh 32 | RUN sed -i s/HOST/${OPENREFINE_HOST}/ ${OPENREFINE_HOME}/start.sh 33 | 34 | EXPOSE ${OPENREFINE_PORT} 35 | 36 | CMD ["start.sh"] -------------------------------------------------------------------------------- /deployment/ansible/roles/ec2_common/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Create a VPC 3 | ec2_vpc: 4 | aws_access_key: "{{ aws_access_key }}" 5 | aws_secret_key: "{{ aws_secret_key }}" 6 | cidr_block: 10.0.0.0/16 7 | dns_hostnames: yes 8 | dns_support: yes 9 | instance_tenancy: default 10 | internet_gateway: yes 11 | region: "{{ region }}" 12 | resource_tags: { "Environment":"Development", "Name" :"vds_vpc"} 13 | route_tables: 14 | - subnets: 15 | - 10.0.0.0/24 16 | routes: 17 | - dest: 0.0.0.0/0 18 | gw: igw 19 | state: present 20 | subnets: 21 | - cidr: 10.0.0.0/24 22 | resource_tags: { "Environment":"Development", "Name" : "Public subnet" } 23 | - cidr: 10.0.1.0/24 24 | resource_tags: { "Environment":"Development", "Name" : "Private subnet" } 25 | wait: true 26 | register: vpc 27 | 28 | - name: Create a security group 29 | ec2_group: 30 | aws_access_key: "{{ aws_access_key }}" 31 | aws_secret_key: "{{ aws_secret_key }}" 32 | name: "{{ security_group }}" 33 | description: Security Group for spark servers 34 | region: "{{ region }}" 35 | vpc_id: "{{ vpc.vpc_id }}" 36 | rules: 37 | - proto: tcp 38 | from_port: 0 39 | to_port: 65000 40 | cidr_ip: 10.0.0.0/16 41 | - proto: tcp 42 | from_port: 22 43 | to_port: 22 44 | cidr_ip: 0.0.0.0/0 45 | rules_egress: 46 | - proto: all 47 | cidr_ip: 0.0.0.0/0 48 | register: sec_group 49 | -------------------------------------------------------------------------------- /deployment/docker/jupyter/templates/jupyter_notebook_config.py.j2: -------------------------------------------------------------------------------- 1 | # Copyright (c) Jupyter Development Team. 2 | # Distributed under the terms of the Modified BSD License. 3 | 4 | from jupyter_core.paths import jupyter_data_dir 5 | import subprocess 6 | import os 7 | import errno 8 | import stat 9 | 10 | c = get_config() 11 | c.NotebookApp.ip = '*' 12 | c.NotebookApp.port = PORT 13 | c.NotebookApp.open_browser = False 14 | c.NotebookApp.tornado_settings = { 'headers': { 'Content-Security-Policy': "frame-ancestors 'self' * " } } 15 | c.NotebookApp.token='' 16 | 17 | # Generate a self-signed certificate 18 | if 'GEN_CERT' in os.environ: 19 | dir_name = jupyter_data_dir() 20 | pem_file = os.path.join(dir_name, 'notebook.pem') 21 | try: 22 | os.makedirs(dir_name) 23 | except OSError as exc: # Python >2.5 24 | if exc.errno == errno.EEXIST and os.path.isdir(dir_name): 25 | pass 26 | else: 27 | raise 28 | # Generate a certificate if one doesn't exist on disk 29 | subprocess.check_call(['openssl', 'req', '-new', 30 | '-newkey', 'rsa:2048', 31 | '-days', '365', 32 | '-nodes', '-x509', 33 | '-subj', '/C=XX/ST=XX/L=XX/O=generated/CN=generated', 34 | '-keyout', pem_file, 35 | '-out', pem_file]) 36 | # Restrict access to the file 37 | os.chmod(pem_file, stat.S_IRUSR | stat.S_IWUSR) 38 | c.NotebookApp.certfile = pem_file -------------------------------------------------------------------------------- /deployment/ansible/roles/nifi/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Setup working directory 3 | file: 4 | owner: nifi 5 | group: nifi 6 | mode: u=rwx,g=rx,o=rx 7 | state: directory 8 | path: /opt/nifi 9 | 10 | - name: Setup logging directory 11 | file: 12 | owner: nifi 13 | group: nifi 14 | mode: u=rwx,g=rx,o=rx 15 | state: directory 16 | path: /var/log/nifi 17 | 18 | - shell: /bin/rm -rf /opt/nifi/* 19 | 20 | - unarchive: 21 | extra_opts: ['--strip=1'] 22 | src: https://storage.googleapis.com/verteego-vds/nifi/nifi-assembly/target/nifi-1.1.0-SNAPSHOT-bin.tar.gz 23 | owner: nifi 24 | group: nifi 25 | dest: /opt/nifi 26 | remote_src: True 27 | 28 | - file: 29 | path: /opt/nifi 30 | owner: nifi 31 | group: nifi 32 | recurse: yes 33 | 34 | - name: Sync config and scripts 35 | template: src={{item.src}} dest={{item.dest}} 36 | with_items: 37 | - { src: 'templates/nifi.properties.j2', dest: '/opt/nifi/conf/nifi.properties', owner: nifi, group: nifi } 38 | - { src: 'templates/bootstrap.conf.j2', dest: '/opt/nifi/conf/bootstrap.conf', owner: nifi, group: nifi } 39 | - { src: 'templates/nifi-env.sh.j2', dest: '/opt/nifi/bin/nifi-env.sh', owner: nifi, group: nifi } 40 | 41 | - name: Copy Nifi service script 42 | template: src={{item.src}} dest={{item.dest}} mode='u=rwx,g=rx,o=rx' 43 | with_items: 44 | - { src: 'templates/systemd/nifi.service.j2', dest: '/lib/systemd/system/nifi.service'} 45 | 46 | - name: enable nifi service at startup 47 | systemd: 48 | name: nifi 49 | enabled: yes 50 | masked: no 51 | 52 | - systemd: 53 | state: restarted 54 | daemon_reload: yes 55 | name: nifi 56 | -------------------------------------------------------------------------------- /deployment/docker/superset/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:2.7 2 | 3 | # Install 4 | ENV SUPERSET_VERSION 0.17.1 5 | 6 | RUN apt-get update && apt-get install -y \ 7 | build-essential \ 8 | libssl-dev \ 9 | libffi-dev \ 10 | python-dev \ 11 | libsasl2-dev \ 12 | libldap2-dev \ 13 | supervisor \ 14 | && apt-get clean -y 15 | RUN apt-get install -y expect 16 | RUN apt-get install -y vim 17 | RUN pip --no-cache-dir install https://storage.googleapis.com/verteego-vds/superset/dist/superset-0.15.0.tar.gz \ 18 | mysqlclient \ 19 | sqlalchemy-redshift \ 20 | redis \ 21 | celery \ 22 | "celery[redis]" \ 23 | Werkzeug \ 24 | jinja2-cli 25 | 26 | # Default config 27 | ENV LANG=C.UTF-8 \ 28 | LC_ALL=C.UTF-8 \ 29 | PATH=$PATH:/home/superset/.bin \ 30 | PYTHONPATH=/home/superset/superset_config.py:$PYTHONPATH 31 | 32 | # Run as superset user 33 | WORKDIR /home/superset 34 | 35 | RUN pip install jinja2-cli[yaml] 36 | 37 | RUN groupadd -r superset && \ 38 | useradd -r -m -g superset superset && \ 39 | mkdir -p /home/superset/db /var/log/supervisor /var/run/supervisor && \ 40 | chown -R superset:superset /home/superset && \ 41 | chown -R superset:superset /var/log/supervisor && \ 42 | chown -R superset:superset /var/run/supervisor 43 | 44 | RUN pip install jinja2-cli[yaml] 45 | COPY config/vars_file.yml /tmp/vars_file.yml 46 | COPY templates/superset/superset_config.py /tmp/superset_config.py.j2 47 | COPY templates/create-user.sh /tmp/create-user.sh.j2 48 | RUN jinja2 /tmp/superset_config.py.j2 /tmp/vars_file.yml --format=yml > /home/superset/superset_config.py 49 | RUN jinja2 /tmp/create-user.sh.j2 /tmp/vars_file.yml --format=yml > /tmp/create-user.sh 50 | 51 | USER superset 52 | RUN expect /tmp/create-user.sh 53 | RUN superset db upgrade 54 | 55 | # Deploy 56 | EXPOSE 8088 57 | HEALTHCHECK CMD ["curl", "-f", "http://localhost:8088/health"] 58 | ENTRYPOINT ["superset"] 59 | CMD ["runserver"] 60 | #USER superset -------------------------------------------------------------------------------- /deployment/ansible/roles/ec2_instance/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Launch EC2 instance 3 | ec2: 4 | aws_access_key: "{{ aws_access_key }}" 5 | aws_secret_key: "{{ aws_secret_key }}" 6 | group: "{{ security_group }}" 7 | instance_type: "{{ instance_type }}" 8 | image: "{{ image }}" 9 | wait: true 10 | region: "{{ region }}" 11 | keypair: "{{ keypair }}" 12 | assign_public_ip: yes 13 | vpc_subnet_id: "{{ vpc.subnets[0].id}}" 14 | count: "{{ instances_count }}" 15 | register: ec2 16 | 17 | - name: Add new instance to host group 18 | add_host: 19 | hostname: "{{ item.public_ip }}" 20 | groupname: launched 21 | with_items: "{{ ec2.instances }}" 22 | 23 | - name: Wait for SSH to come up 24 | wait_for: host="{{ item.public_ip }}" port=22 delay=60 timeout=320 state=started 25 | with_items: "{{ ec2.instances }}" 26 | 27 | - name: Add storage volume 28 | ec2_vol: 29 | aws_access_key: "{{ aws_access_key }}" 30 | aws_secret_key: "{{ aws_secret_key }}" 31 | device_name: /dev/xvdb 32 | volume_type: gp2 33 | volume_size: "{{ storage_size }}" 34 | region: "{{ region }}" 35 | instance: "{{ item.id }}" 36 | with_items: "{{ ec2.instances }}" 37 | when: (storage_size is defined) and (storage_size > 0) 38 | 39 | - name: Add tag to Instance(s) 40 | ec2_tag: 41 | aws_access_key: "{{ aws_access_key }}" 42 | aws_secret_key: "{{ aws_secret_key }}" 43 | resource : "{{ item.id }}" 44 | region : "{{ region }}" 45 | state : present 46 | with_items: "{{ ec2.instances }}" 47 | args: 48 | tags: 49 | Name: "{{ group }}" 50 | 51 | - name: Add the newly created EC2 instance(s) to the local host group (located inside the directory) 52 | lineinfile: 53 | dest="./hosts" 54 | regexp="{{ item.public_dns_name }} ansible_host={{ item.public_ip }} ansible_user=ubuntu ansible_ssh_private_key_file={{ aws_key_file }}" 55 | insertafter="{{ group }}" 56 | line="{{ item.public_dns_name }} ansible_host={{ item.public_ip }} ansible_user=ubuntu ansible_ssh_private_key_file={{ aws_key_file }}" 57 | with_items: "{{ ec2.instances }}" -------------------------------------------------------------------------------- /deployment/docker/nifi/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Dockerfile for nifi 3 | # 4 | 5 | FROM java:8-jre-alpine 6 | MAINTAINER s3ni0r 7 | 8 | ENV NIFI_VERSION 1.1.0 9 | ENV NIFI_FILE nifi-${NIFI_VERSION}-bin.tar.gz 10 | ENV NIFI_URL https://storage.googleapis.com/verteego-vds/nifi/nifi-assembly/target/nifi-1.1.0-SNAPSHOT-bin.tar.gz 11 | ENV NIFI_HOME /opt/nifi 12 | ENV NIFI_PORT 33331 13 | ENV NIFI_LOG_DIR /var/log/nifi 14 | ENV PYTHON_VERSION=2.7.12-r0 15 | ENV PY_PIP_VERSION=8.1.2-r0 16 | ENV SUPERVISOR_VERSION=3.3.1 17 | 18 | WORKDIR $NIFI_HOME 19 | 20 | 21 | RUN apk update && apk add -u python=$PYTHON_VERSION py-pip=$PY_PIP_VERSION 22 | RUN pip install supervisor==$SUPERVISOR_VERSION 23 | 24 | RUN set -xe \ 25 | && apk add --no-cache bash curl tar python python-dev py-pip \ 26 | && curl -sSL ${NIFI_URL} | tar xz --strip-components=1 \ 27 | && apk del curl tar \ 28 | && addgroup -g 1000 nifi \ 29 | && adduser -h ${NIFI_HOME} -s /bin/bash -G nifi -u 1000 -D -H nifi \ 30 | && mkdir -p content_repository database_repository flowfile_repository provenance_repository ${NIFI_LOG_DIR}\ 31 | && chown -R nifi:nifi ${NIFI_HOME} \ 32 | && chown -R nifi:nifi ${NIFI_LOG_DIR} 33 | 34 | RUN pip install j2cli[yaml] 35 | 36 | COPY templates/bootstrap.conf.j2 /tmp/bootstrap.conf.j2 37 | COPY templates/nifi.properties.j2 /tmp/nifi.properties.j2 38 | COPY templates/nifi-env.sh.j2 /tmp/nifi-env.sh.j2 39 | COPY templates/supervisord.conf /etc/supervisord.conf 40 | COPY config/vars_file.yml /tmp/vars_file.yml 41 | 42 | RUN j2 /tmp/bootstrap.conf.j2 /tmp/vars_file.yml > ${NIFI_HOME}/conf/bootstrap.conf 43 | RUN j2 /tmp/nifi.properties.j2 /tmp/vars_file.yml > ${NIFI_HOME}/conf/nifi.properties 44 | RUN j2 /tmp/nifi-env.sh.j2 /tmp/vars_file.yml > ${NIFI_HOME}/conf/nifi-env.sh 45 | 46 | VOLUME ${NIFI_HOME}/content_repository \ 47 | ${NIFI_HOME}/database_repository \ 48 | ${NIFI_HOME}/flowfile_repository \ 49 | ${NIFI_HOME}/provenance_repository 50 | 51 | EXPOSE ${NIFI_PORT} 52 | 53 | #USER nifi 54 | 55 | CMD ["supervisord", "--nodaemon", "--configuration", "/etc/supervisord.conf"] -------------------------------------------------------------------------------- /deployment/ansible/roles/common-server/templates/limits.j2: -------------------------------------------------------------------------------- 1 | # /etc/security/limits.conf 2 | # 3 | #Each line describes a limit for a user in the form: 4 | # 5 | # 6 | # 7 | #Where: 8 | # can be: 9 | # - an user name 10 | # - a group name, with @group syntax 11 | # - the wildcard *, for default entry 12 | # - the wildcard %, can be also used with %group syntax, 13 | # for maxlogin limit 14 | # - NOTE: group and wildcard limits are not applied to root. 15 | # To apply a limit to the root user, must be 16 | # the literal username root. 17 | # 18 | # can have the two values: 19 | # - "soft" for enforcing the soft limits 20 | # - "hard" for enforcing hard limits 21 | # 22 | # can be one of the following: 23 | # - core - limits the core file size (KB) 24 | # - data - max data size (KB) 25 | # - fsize - maximum filesize (KB) 26 | # - memlock - max locked-in-memory address space (KB) 27 | # - nofile - max number of open files 28 | # - rss - max resident set size (KB) 29 | # - stack - max stack size (KB) 30 | # - cpu - max CPU time (MIN) 31 | # - nproc - max number of processes 32 | # - as - address space limit (KB) 33 | # - maxlogins - max number of logins for this user 34 | # - maxsyslogins - max number of logins on the system 35 | # - priority - the priority to run user process with 36 | # - locks - max number of file locks the user can hold 37 | # - sigpending - max number of pending signals 38 | # - msgqueue - max memory used by POSIX message queues (bytes) 39 | # - nice - max nice priority allowed to raise to values: [-20, 19] 40 | # - rtprio - max realtime priority 41 | # - chroot - change root to directory (Debian-specific) 42 | # 43 | # 44 | # 45 | 46 | #* soft core 0 47 | #root hard core 100000 48 | #* hard rss 10000 49 | #@student hard nproc 20 50 | #@faculty soft nproc 20 51 | #@faculty hard nproc 50 52 | #ftp hard nproc 0 53 | #ftp - chroot /ftp 54 | #@student - maxlogins 4 55 | * soft nofile 300000 56 | * hard nofile 300000 57 | root soft nofile 300000 58 | root hard nofile 300000 59 | 60 | # End of file 61 | -------------------------------------------------------------------------------- /deployment/ansible/roles/superset/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Setup working directory 3 | file: 4 | owner: superset 5 | group: superset 6 | mode: u=rwx,g=rx,o=rx 7 | state: directory 8 | path: /opt/superset 9 | 10 | - name: Setup logging directory 11 | file: 12 | owner: superset 13 | group: superset 14 | mode: u=rwx,g=rx,o=rx 15 | state: directory 16 | path: /var/log/superset 17 | 18 | - command: /bin/rm -rf /opt/superset/venv 19 | 20 | - name: create virtualenv 21 | command: virtualenv /opt/superset/venv --system-site-packages -p python2.7 22 | become: yes 23 | become_user: superset 24 | 25 | - pip: 26 | name: https://storage.googleapis.com/verteego-vds/superset/dist/superset-0.15.0.tar.gz 27 | virtualenv: /opt/superset/venv 28 | virtualenv_site_packages: yes 29 | become: yes 30 | become_user: superset 31 | 32 | - file: path=/home/superset/.superset/superset.db state=absent 33 | 34 | - expect: 35 | command: /opt/superset/venv/bin/fabmanager create-admin --app superset 36 | timeout: 120 37 | responses: 38 | (?i)username.*: "{{ superset.admin.username }}" 39 | (?i)first.*: "{{ superset.admin.first_name }}" 40 | (?i)last.*: "{{ superset.admin.last_name }}" 41 | (?i)email.*: "{{ superset.admin.email }}" 42 | (?i)password.*: "{{ superset.admin.password }}" 43 | (?i)repeat.*: "{{ superset.admin.password }}" 44 | become: yes 45 | become_user: superset 46 | 47 | - name: 48 | command: /opt/superset/venv/bin/superset db upgrade 49 | become: yes 50 | become_user: superset 51 | 52 | - name: 53 | command: /opt/superset/venv/bin/superset init 54 | become: yes 55 | become_user: superset 56 | 57 | - name: Copy superset service script 58 | template: src={{item.src}} dest={{item.dest}} mode='u=rwx,g=rx,o=rx' 59 | with_items: 60 | - { src: 'templates/systemd/superset.service.j2', dest: '/lib/systemd/system/superset.service'} 61 | 62 | - name: copy superset config 63 | copy: 64 | src: ../templates/superset_config 65 | dest: /opt/superset/ 66 | become: yes 67 | become_user: superset 68 | 69 | - name: Add superset config to virtualenv PYTHONPATH 70 | template: src={{item.src}} dest={{item.dest}} 71 | with_items: 72 | - { src: 'templates/superset_custom_config.pth', dest: '/opt/superset/venv/lib/python2.7/site-packages', owner: superset, group: superset} 73 | - { src: 'templates/superset_config/superset_config.py.j2', dest: '/opt/superset/superset_config/superset_config.py', owner: superset, group: superset} 74 | 75 | - name: enable superset service at startup 76 | systemd: 77 | name: superset 78 | enabled: yes 79 | masked: no 80 | 81 | - systemd: 82 | state: restarted 83 | daemon_reload: yes 84 | name: superset -------------------------------------------------------------------------------- /deployment/ansible/setup_gc_instance.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Compute Engine Instance Examples 3 | hosts: localhost 4 | vars: 5 | machine_type: "{{ gmachine_type | default('n1-standard-1') }}" 6 | zone: "{{ gzone | default('europe-west1-d') }}" 7 | disk_size: "{{ gdisk_size | default(20) }}" 8 | disk_name: "{{ gdisk_name | default('custom-disk-1') }}" 9 | image: debian-8-jessie-v20161215 10 | firewall_tag: "vds" 11 | credentials_file: "files/gcp/ansible.json" 12 | tasks: 13 | - shell: cat "{{ credentials_file }}" 14 | register: result 15 | - set_fact: gcloud="{{ result.stdout | from_json }}" 16 | - debug: var=gcloud.client_email 17 | 18 | - name: Make boot disk 19 | gce_pd: 20 | disk_type: pd-standard 21 | image: "{{ image }}" 22 | name: "{{ disk_name }}" 23 | size_gb: "{{ disk_size }}" 24 | state: present 25 | service_account_email: "{{ gcloud.client_email }}" 26 | credentials_file: "{{ credentials_file }}" 27 | project_id: "{{ gcloud.project_id }}" 28 | zone: "{{ zone }}" 29 | 30 | - name: Create Firewall Rule w/allowed port Range and tag 31 | gce_net: 32 | name: default 33 | credentials_file: "{{ credentials_file }}" 34 | fwname: "{{ firewall_tag }}" 35 | service_account_email: "{{ gcloud.client_email }}" 36 | project_id: "{{ gcloud.project_id }}" 37 | allowed: tcp:33330-33335 38 | state: "present" 39 | src_range: ['0.0.0.0/0'] 40 | 41 | - name: create standard instance 42 | gce: 43 | instance_names: "{{ ginstance_name | default('auto-vds-ansible') }}" 44 | disk_auto_delete: true 45 | disks: 46 | - name: "{{ disk_name }}" 47 | mode: READ_WRITE 48 | zone: "{{ zone }}" 49 | machine_type: "{{ machine_type }}" 50 | image: "{{ image }}" 51 | state: present 52 | service_account_email: "{{ gcloud.client_email }}" 53 | credentials_file: "{{ credentials_file }}" 54 | project_id: "{{ gcloud.project_id }}" 55 | tags: "{{ firewall_tag }}" 56 | register: gce 57 | 58 | - name: Fix .ssh/known_hosts. 59 | local_action: command ssh-keygen -f "~/.ssh/known_hosts" -R {{ item.public_ip }} 60 | with_items: "{{ gce.instance_data }}" 61 | 62 | - name: Wait for SSH to come up 63 | wait_for: host={{ item.public_ip }} port=22 delay=10 timeout=600 64 | with_items: "{{ gce.instance_data }}" 65 | 66 | - name: Add host to groupname 67 | add_host: hostname={{ item.public_ip }} groupname=new_instances 68 | with_items: "{{ gce.instance_data }}" 69 | 70 | - name: Setup newly created instance 71 | hosts: new_instances 72 | become: yes 73 | become_method : sudo 74 | roles: 75 | - common-server 76 | - java 77 | - nifi 78 | - openrefine 79 | - superset 80 | - h2o 81 | - anaconda -------------------------------------------------------------------------------- /deployment/docker/docker-stack-prod.yaml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | services: 3 | visualizer: 4 | image: dockersamples/visualizer 5 | ports: 6 | - "8080:8080" 7 | volumes: 8 | - "/var/run/docker.sock:/var/run/docker.sock" 9 | deploy: 10 | replicas: 1 11 | placement: 12 | constraints: 13 | - node.role == manager 14 | resources: 15 | limits: 16 | cpus: '0.1' 17 | memory: 100M 18 | reservations: 19 | cpus: '0.1' 20 | memory: 150M 21 | 22 | zookeeper: 23 | image: zookeeper 24 | deploy: 25 | replicas: 2 26 | placement: 27 | constraints: 28 | - node.role == manager 29 | resources: 30 | limits: 31 | cpus: '0.25' 32 | memory: 500M 33 | reservations: 34 | cpus: '0.15' 35 | memory: 250M 36 | 37 | h2o: 38 | image: vdshub/h2o 39 | ports: 40 | - "33333:33333" 41 | command: supervisord 42 | deploy: 43 | replicas: 1 44 | placement: 45 | constraints: 46 | - node.role == manager 47 | resources: 48 | limits: 49 | cpus: '0.25' 50 | memory: 500M 51 | reservations: 52 | cpus: '0.15' 53 | memory: 250M 54 | 55 | nifi: 56 | image: vdshub/nifi 57 | ports: 58 | - "33331:33331" 59 | deploy: 60 | replicas: 1 61 | placement: 62 | constraints: 63 | - node.hostname == worker2 64 | resources: 65 | limits: 66 | cpus: '0.75' 67 | memory: 2G 68 | reservations: 69 | cpus: '0.50' 70 | memory: 1G 71 | 72 | openrefine: 73 | image: vdshub/openrefine 74 | command: /opt/openrefine/start.sh 75 | ports: 76 | - "33332:33332" 77 | deploy: 78 | replicas: 1 79 | placement: 80 | constraints: 81 | - node.role == worker 82 | resources: 83 | limits: 84 | cpus: '0.4' 85 | memory: 750M 86 | reservations: 87 | cpus: '0.4' 88 | memory: 750M 89 | 90 | superset: 91 | image: vdshub/superset 92 | ports: 93 | - "33330:8088" 94 | deploy: 95 | replicas: 1 96 | placement: 97 | constraints: 98 | - node.role == worker 99 | resources: 100 | limits: 101 | cpus: '0.25' 102 | memory: 500M 103 | reservations: 104 | cpus: '0.25' 105 | memory: 500M 106 | 107 | jupyter: 108 | image: vdshub/jupyter 109 | ports: 110 | - "33335:33335" 111 | deploy: 112 | replicas: 1 113 | placement: 114 | constraints: 115 | - node.role == worker 116 | resources: 117 | limits: 118 | cpus: '0.25' 119 | memory: 500M 120 | reservations: 121 | cpus: '0.25' 122 | memory: 500M 123 | 124 | #volumes: 125 | # registry: 126 | # driver: rexray 127 | # h2o: 128 | # driver: rexray 129 | # nifi: 130 | # driver: rexray 131 | # openrefine: 132 | # driver: rexray 133 | # superset: 134 | # driver: rexray 135 | # anaconda: 136 | # driver: rexray 137 | 138 | -------------------------------------------------------------------------------- /deployment/docker/nifi/templates/bootstrap.conf.j2: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Java command to use when running NiFi 19 | java=java 20 | 21 | # Username to use when running NiFi. This value will be ignored on Windows. 22 | run.as=nifi 23 | 24 | # Configure where NiFi's lib and conf directories live 25 | lib.dir=./lib 26 | conf.dir=./conf 27 | 28 | # How long to wait after telling NiFi to shutdown before explicitly killing the Process 29 | graceful.shutdown.seconds=20 30 | 31 | # Disable JSR 199 so that we can use JSP's without running a JDK 32 | java.arg.1=-Dorg.apache.jasper.compiler.disablejsr199=true 33 | 34 | # JVM memory settings 35 | java.arg.2=-Xms512m 36 | java.arg.3=-Xmx{{ nifi.java.Xmx }} 37 | 38 | # Enable Remote Debugging 39 | #java.arg.debug=-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=8000 40 | 41 | java.arg.4=-Djava.net.preferIPv4Stack=true 42 | 43 | # allowRestrictedHeaders is required for Cluster/Node communications to work properly 44 | java.arg.5=-Dsun.net.http.allowRestrictedHeaders=true 45 | java.arg.6=-Djava.protocol.handler.pkgs=sun.net.www.protocol 46 | 47 | # The G1GC is still considered experimental but has proven to be very advantageous in providing great 48 | # performance without significant "stop-the-world" delays. 49 | java.arg.13=-XX:+UseG1GC 50 | 51 | #Set headless mode by default 52 | java.arg.14=-Djava.awt.headless=true 53 | 54 | # Master key in hexadecimal format for encrypted sensitive configuration values 55 | nifi.bootstrap.sensitive.key= 56 | 57 | ### 58 | # Notification Services for notifying interested parties when NiFi is stopped, started, dies 59 | ### 60 | 61 | # XML File that contains the definitions of the notification services 62 | notification.services.file=./conf/bootstrap-notification-services.xml 63 | 64 | # In the case that we are unable to send a notification for an event, how many times should we retry? 65 | notification.max.attempts=5 66 | 67 | # Comma-separated list of identifiers that are present in the notification.services.file; which services should be used to notify when NiFi is started? 68 | #nifi.start.notification.services=email-notification 69 | 70 | # Comma-separated list of identifiers that are present in the notification.services.file; which services should be used to notify when NiFi is stopped? 71 | #nifi.stop.notification.services=email-notification 72 | 73 | # Comma-separated list of identifiers that are present in the notification.services.file; which services should be used to notify when NiFi dies? 74 | #nifi.dead.notification.services=email-notification -------------------------------------------------------------------------------- /deployment/ansible/roles/nifi/templates/bootstrap.conf.j2: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Java command to use when running NiFi 19 | java=java 20 | 21 | # Username to use when running NiFi. This value will be ignored on Windows. 22 | run.as=nifi 23 | 24 | # Configure where NiFi's lib and conf directories live 25 | lib.dir=./lib 26 | conf.dir=./conf 27 | 28 | # How long to wait after telling NiFi to shutdown before explicitly killing the Process 29 | graceful.shutdown.seconds=20 30 | 31 | # Disable JSR 199 so that we can use JSP's without running a JDK 32 | java.arg.1=-Dorg.apache.jasper.compiler.disablejsr199=true 33 | 34 | # JVM memory settings 35 | java.arg.2=-Xms512m 36 | java.arg.3=-Xmx{{ nifi.java.Xmx }} 37 | 38 | # Enable Remote Debugging 39 | #java.arg.debug=-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=8000 40 | 41 | java.arg.4=-Djava.net.preferIPv4Stack=true 42 | 43 | # allowRestrictedHeaders is required for Cluster/Node communications to work properly 44 | java.arg.5=-Dsun.net.http.allowRestrictedHeaders=true 45 | java.arg.6=-Djava.protocol.handler.pkgs=sun.net.www.protocol 46 | 47 | # The G1GC is still considered experimental but has proven to be very advantageous in providing great 48 | # performance without significant "stop-the-world" delays. 49 | java.arg.13=-XX:+UseG1GC 50 | 51 | #Set headless mode by default 52 | java.arg.14=-Djava.awt.headless=true 53 | 54 | # Master key in hexadecimal format for encrypted sensitive configuration values 55 | nifi.bootstrap.sensitive.key= 56 | 57 | ### 58 | # Notification Services for notifying interested parties when NiFi is stopped, started, dies 59 | ### 60 | 61 | # XML File that contains the definitions of the notification services 62 | notification.services.file=./conf/bootstrap-notification-services.xml 63 | 64 | # In the case that we are unable to send a notification for an event, how many times should we retry? 65 | notification.max.attempts=5 66 | 67 | # Comma-separated list of identifiers that are present in the notification.services.file; which services should be used to notify when NiFi is started? 68 | #nifi.start.notification.services=email-notification 69 | 70 | # Comma-separated list of identifiers that are present in the notification.services.file; which services should be used to notify when NiFi is stopped? 71 | #nifi.stop.notification.services=email-notification 72 | 73 | # Comma-separated list of identifiers that are present in the notification.services.file; which services should be used to notify when NiFi dies? 74 | #nifi.dead.notification.services=email-notification -------------------------------------------------------------------------------- /deployment/ansible/roles/anaconda/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Setup working directory 3 | file: 4 | owner: anaconda 5 | group: anaconda 6 | mode: u=rwx,g=rx,o=rx 7 | state: directory 8 | path: /opt/anaconda 9 | 10 | - name: Setup logging directory 11 | file: 12 | owner: anaconda 13 | group: anaconda 14 | mode: u=rwx,g=rx,o=rx 15 | state: directory 16 | path: /var/log/anaconda 17 | 18 | - name: Setup jupyter directory 19 | file: 20 | owner: anaconda 21 | group: anaconda 22 | mode: u=rwx,g=rx,o=rx 23 | state: directory 24 | path: /home/anaconda/.jupyter 25 | 26 | - name: download anaconda installer 27 | get_url: 28 | url: "https://repo.continuum.io/archive/Anaconda3-4.2.0-Linux-x86_64.sh" 29 | dest: /tmp/anaconda.sh 30 | mode: 0755 31 | become: yes 32 | become_user: anaconda 33 | 34 | - command: /bin/rm -rf /opt/anaconda/anaconda3 35 | 36 | - name: install anaconda 37 | shell: /tmp/anaconda.sh -b -p /opt/anaconda/anaconda3 38 | become: yes 39 | become_user: anaconda 40 | 41 | - name: update pip 42 | shell: /opt/anaconda/anaconda3/bin/pip install -U pip 43 | become: yes 44 | become_user: anaconda 45 | 46 | 47 | - name: Install R kernel 48 | shell: /opt/anaconda/anaconda3/bin/conda install -c r r-essentials -y 49 | async: 3600 50 | poll: 10 51 | register: r_install_sleeper 52 | become: yes 53 | become_user: anaconda 54 | 55 | - name: 'r install - check on fire and forget task' 56 | async_status: jid={{ r_install_sleeper.ansible_job_id }} 57 | register: job_result 58 | until: job_result.finished 59 | retries: 60 60 | become: yes 61 | become_user: anaconda 62 | 63 | - name: update conda to latest version 64 | shell: /opt/anaconda/anaconda3/bin/conda update conda -y 65 | become: yes 66 | become_user: anaconda 67 | 68 | - name: update conda to latest version 69 | shell: /opt/anaconda/anaconda3/bin/conda install -c conda-forge nb_conda_kernels -y 70 | become: yes 71 | become_user: anaconda 72 | 73 | - name: Install Python 2 ipykernel 74 | shell: /opt/anaconda/anaconda3/bin/conda create -n py27 python=2.7 ipykernel -y 75 | become: yes 76 | become_user: anaconda 77 | 78 | - name: Install Bash kernel 79 | shell: /opt/anaconda/anaconda3/bin/pip3.5 install bash_kernel 80 | become: yes 81 | become_user: anaconda 82 | 83 | - name: Install Bash kernel 84 | shell: /opt/anaconda/anaconda3/bin/python3.5 -m bash_kernel.install 85 | become: yes 86 | become_user: anaconda 87 | 88 | - lineinfile: 89 | dest: /home/anaconda/.bashrc 90 | state: present 91 | regexp: '^PATH=' 92 | line: 'PATH=/opt/anaconda/anaconda3/bin:$PATH' 93 | state: present 94 | owner: anaconda 95 | 96 | - name: Copy anaconda service script 97 | template: src={{item.src}} dest={{item.dest}} mode='u=rwx,g=rx,o=rx' owner={{item.owner}} group={{item.owner}} 98 | with_items: 99 | - { src: 'templates/systemd/anaconda.service.j2', dest: '/lib/systemd/system/anaconda.service', owner: root, group: root} 100 | - { src: 'templates/jupyter_notebook_config.py.j2', dest: '/home/anaconda/.jupyter/jupyter_notebook_config.py', owner: anaconda, group: anaconda} 101 | 102 | - name: enable anaconda service at startup 103 | systemd: 104 | name: anaconda 105 | enabled: yes 106 | masked: no 107 | 108 | - systemd: 109 | state: restarted 110 | daemon_reload: yes 111 | name: anaconda 112 | -------------------------------------------------------------------------------- /deployment/ansible/roles/common-server/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Install common packages 3 | action: apt pkg={{ item }} state=present update_cache=true 4 | with_items: 5 | - python-software-properties 6 | - software-properties-common 7 | - debian-archive-keyring 8 | - apt-transport-https 9 | - ntp 10 | - build-essential 11 | - libssl-dev 12 | - libffi-dev 13 | - libxml2-dev 14 | - libxslt1-dev 15 | - libsasl2-dev 16 | - libldap2-dev 17 | - libfreetype6-dev 18 | - libopenblas-dev 19 | - liblapack-dev 20 | - gfortran 21 | - python-dev 22 | - python-pip 23 | - python-setuptools 24 | - python-virtualenv 25 | - python-pip 26 | - python-numpy 27 | - python-scipy 28 | - python-cairo 29 | - python-mysqldb 30 | - htop 31 | - tmux 32 | - iotop 33 | - git 34 | 35 | - pip: 36 | name: six 37 | version: 1.10.0 38 | 39 | - pip: 40 | name: packaging 41 | 42 | - pip: 43 | name: appdirs 44 | 45 | - pip: 46 | name: pandas 47 | version: 0.18.1 48 | 49 | - pip: 50 | name: pexpect 51 | version: 4.2 52 | 53 | - pip: 54 | name: cffi 55 | state: latest 56 | extra_args : -U 57 | 58 | ## LIMITS 59 | - name: Setup limits.conf 60 | template: src=limits.j2 dest=/etc/security/limits.conf owner=root group=root mode=0644 61 | 62 | ## LOCALE 63 | - name: Default locale 64 | template: src=locale.j2 dest=/etc/default/locale owner=root group=root mode=0644 65 | 66 | 67 | ## Timezone 68 | - name: Set timezone variables 69 | template: src=timezone.j2 dest=/etc/timezone owner=root group=root mode=0644 70 | notify: update timezone 71 | 72 | ## Create system user for vds applications 73 | - group: name=superset state=present gid=13500 74 | - name: Allow 'superset' group to have passwordless sudo 75 | lineinfile: 76 | dest: /etc/sudoers 77 | state: present 78 | regexp: '^%superset' 79 | line: '%superset ALL=(ALL) NOPASSWD: ALL' 80 | validate: 'visudo -cf %s' 81 | - user: name=superset state=present uid=13500 groups=superset shell=/bin/bash 82 | 83 | - group: name=nifi state=present gid=13501 84 | - name: Allow 'nifi' group to have passwordless sudo 85 | lineinfile: 86 | dest: /etc/sudoers 87 | state: present 88 | regexp: '^%nifi' 89 | line: '%nifi ALL=(ALL) NOPASSWD: ALL' 90 | validate: 'visudo -cf %s' 91 | - user: name=nifi state=present uid=13501 groups=nifi shell=/bin/bash 92 | 93 | - group: name=openrefine state=present gid=13502 94 | - name: Allow 'openrefine' group to have passwordless sudo 95 | lineinfile: 96 | dest: /etc/sudoers 97 | state: present 98 | regexp: '^%openrefine' 99 | line: '%openrefine ALL=(ALL) NOPASSWD: ALL' 100 | validate: 'visudo -cf %s' 101 | - user: name=openrefine state=present uid=13502 groups=openrefine shell=/bin/bash 102 | 103 | - group: name=h2o state=present gid=13503 104 | - name: Allow 'h2o' group to have passwordless sudo 105 | lineinfile: 106 | dest: /etc/sudoers 107 | state: present 108 | regexp: '^%h2o' 109 | line: '%h2o ALL=(ALL) NOPASSWD: ALL' 110 | validate: 'visudo -cf %s' 111 | - user: name=h2o state=present uid=13503 groups=h2o shell=/bin/bash 112 | 113 | - group: name=anaconda state=present gid=13505 114 | - name: Allow 'anaconda' group to have passwordless sudo 115 | lineinfile: 116 | dest: /etc/sudoers 117 | state: present 118 | regexp: '^%anaconda' 119 | line: '%anaconda ALL=(ALL) NOPASSWD: ALL' 120 | validate: 'visudo -cf %s' 121 | - user: name=anaconda state=present uid=13505 groups=anaconda shell=/bin/bash -------------------------------------------------------------------------------- /deployment/docker/docker-stack-dev.yaml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | services: 3 | # register: 4 | # image: registry:2 5 | # ports: 6 | # - "5000:5000" 7 | # deploy: 8 | # replicas: 1 9 | # placement: 10 | # constraints: 11 | # - node.role == manager 12 | # resources: 13 | # limits: 14 | # cpus: '0.1' 15 | # memory: 100M 16 | # reservations: 17 | # cpus: '0.1' 18 | # memory: 150M 19 | 20 | visualizer: 21 | image: dockersamples/visualizer 22 | ports: 23 | - "8080:8080" 24 | volumes: 25 | - "/var/run/docker.sock:/var/run/docker.sock" 26 | deploy: 27 | replicas: 1 28 | placement: 29 | constraints: 30 | - node.role == manager 31 | resources: 32 | limits: 33 | cpus: '0.1' 34 | memory: 100M 35 | reservations: 36 | cpus: '0.1' 37 | memory: 150M 38 | 39 | zookeeper: 40 | image: zookeeper 41 | deploy: 42 | replicas: 1 43 | placement: 44 | constraints: 45 | - node.role == manager 46 | resources: 47 | limits: 48 | cpus: '0.25' 49 | memory: 500M 50 | reservations: 51 | cpus: '0.15' 52 | memory: 250M 53 | 54 | # h2o: 55 | ## image: verteego/h2o 56 | # image: localhost:5000/h2o 57 | # ports: 58 | # - "33333:33333" 59 | ## volumes: 60 | ## - h2o:/tmp/h2o 61 | # command: supervisord 62 | # deploy: 63 | # replicas: 1 64 | # placement: 65 | # constraints: 66 | # - node.role == worker 67 | # resources: 68 | # limits: 69 | # cpus: '0.25' 70 | # memory: 500M 71 | # reservations: 72 | # cpus: '0.15' 73 | # memory: 250M 74 | # 75 | # nifi: 76 | ## image: verteego/nifi 77 | # image: localhost:5000/nifi:latest 78 | # ports: 79 | # - "33331:33331" 80 | # deploy: 81 | # replicas: 1 82 | # placement: 83 | # constraints: 84 | # - node.role == manager 85 | # resources: 86 | # limits: 87 | # cpus: '0.75' 88 | # memory: 1G 89 | # reservations: 90 | # cpus: '0.50' 91 | # memory: 1G 92 | # 93 | # openrefine: 94 | # image: localhost:5000/openrefine 95 | # command: /opt/openrefine/start.sh 96 | # ports: 97 | # - "33332:33332" 98 | # deploy: 99 | # replicas: 1 100 | # placement: 101 | # constraints: 102 | # - node.role == worker 103 | # resources: 104 | # limits: 105 | # cpus: '0.75' 106 | # memory: 1G 107 | # reservations: 108 | # cpus: '0.50' 109 | # memory: 1G 110 | # 111 | superset: 112 | # image: localhost:5000/superset:latest 113 | image: vdshub/superset 114 | ports: 115 | - "33330:8088" 116 | deploy: 117 | replicas: 1 118 | placement: 119 | constraints: 120 | - node.role == worker 121 | resources: 122 | limits: 123 | cpus: '0.5' 124 | memory: 500M 125 | reservations: 126 | cpus: '0.5' 127 | memory: 500M 128 | 129 | jupyter: 130 | image: vdshub/jupyter 131 | ports: 132 | - "8888:8088" 133 | deploy: 134 | replicas: 1 135 | placement: 136 | constraints: 137 | - node.role == worker 138 | resources: 139 | limits: 140 | cpus: '0.5' 141 | memory: 500M 142 | reservations: 143 | cpus: '0.5' 144 | memory: 500M 145 | 146 | #volumes: 147 | # registry: 148 | # driver: rexray 149 | # h2o: 150 | # driver: rexray 151 | # nifi: 152 | # driver: rexray 153 | # openrefine: 154 | # driver: rexray 155 | # superset: 156 | # driver: rexray 157 | # anaconda: 158 | # driver: rexray 159 | 160 | -------------------------------------------------------------------------------- /deployment/ansible/setup_on_aws.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Start EC2 Instances 3 | hosts: localhost 4 | connection: local 5 | gather_facts: False 6 | vars: 7 | aws_key_file: "files/aws/vds.pem" 8 | instances_count: 1 9 | keypair: vds 10 | security_group: vds 11 | image: ami-11c57862 12 | region: eu-west-1 13 | instance_type: t2.medium 14 | group: AwsVdsInstance 15 | storage_size: 50 16 | keys_file: "files/aws/keys.json" 17 | tasks: 18 | - shell: cat "{{ keys_file }}" 19 | register: result 20 | - set_fact: awsKeys="{{ result.stdout | from_json }}" 21 | - debug: var=awsKeys.aws_access_key 22 | - debug: var=awsKeys.aws_secret_key 23 | 24 | - name: Create a VPC 25 | ec2_vpc: 26 | aws_access_key: "{{ awsKeys.aws_access_key }}" 27 | aws_secret_key: "{{ awsKeys.aws_secret_key }}" 28 | cidr_block: 10.0.0.0/16 29 | dns_hostnames: yes 30 | dns_support: yes 31 | instance_tenancy: default 32 | internet_gateway: yes 33 | region: "{{ region }}" 34 | resource_tags: { "Environment":"Development", "Name" :"vds_vpc"} 35 | route_tables: 36 | - subnets: 37 | - 10.0.0.0/24 38 | routes: 39 | - dest: 0.0.0.0/0 40 | gw: igw 41 | state: present 42 | subnets: 43 | - cidr: 10.0.0.0/24 44 | resource_tags: { "Environment":"Development", "Name" : "Public subnet" } 45 | - cidr: 10.0.1.0/24 46 | resource_tags: { "Environment":"Development", "Name" : "Private subnet" } 47 | wait: true 48 | register: vpc 49 | 50 | - name: Create a security group 51 | ec2_group: 52 | aws_access_key: "{{ awsKeys.aws_access_key }}" 53 | aws_secret_key: "{{ awsKeys.aws_secret_key }}" 54 | name: "{{ security_group }}" 55 | description: Security Group for spark servers 56 | region: "{{ region }}" 57 | vpc_id: "{{ vpc.vpc_id }}" 58 | rules: 59 | - proto: tcp 60 | from_port: 0 61 | to_port: 65000 62 | cidr_ip: 10.0.0.0/16 63 | - proto: tcp 64 | from_port: 22 65 | to_port: 22 66 | cidr_ip: 0.0.0.0/0 67 | - proto: tcp 68 | from_port: 33330 69 | to_port: 33335 70 | cidr_ip: 0.0.0.0/0 71 | rules_egress: 72 | - proto: all 73 | cidr_ip: 0.0.0.0/0 74 | register: sec_group 75 | 76 | - name: Launch EC2 instance 77 | ec2: 78 | aws_access_key: "{{ awsKeys.aws_access_key }}" 79 | aws_secret_key: "{{ awsKeys.aws_secret_key }}" 80 | group: "{{ security_group }}" 81 | instance_type: "{{ instance_type }}" 82 | image: "{{ image }}" 83 | wait: true 84 | wait_timeout: 500 85 | region: "{{ region }}" 86 | keypair: "{{ keypair }}" 87 | assign_public_ip: yes 88 | vpc_subnet_id: "{{ vpc.subnets[0].id}}" 89 | wait: yes 90 | state: present 91 | volumes: 92 | - device_name: /dev/xvda 93 | volume_type: gp2 94 | volume_size: 50 95 | delete_on_termination: true 96 | register: ec2 97 | 98 | - name: Add storage volume 99 | ec2_vol: 100 | aws_access_key: "{{ awsKeys.aws_access_key }}" 101 | aws_secret_key: "{{ awsKeys.aws_secret_key }}" 102 | device_name: /dev/xvdb 103 | volume_type: gp2 104 | volume_size: "{{ storage_size }}" 105 | region: "{{ region }}" 106 | instance: "{{ item.id }}" 107 | with_items: "{{ ec2.instances }}" 108 | when: (storage_size is defined) and (storage_size > 0) 109 | 110 | - name: Add tag to Instance(s) 111 | ec2_tag: 112 | aws_access_key: "{{ awsKeys.aws_access_key }}" 113 | aws_secret_key: "{{ awsKeys.aws_secret_key }}" 114 | resource : "{{ item.id }}" 115 | region : "{{ region }}" 116 | state : present 117 | with_items: "{{ ec2.instances }}" 118 | args: 119 | tags: 120 | Name: "{{ group }}" 121 | 122 | - name: Add new instance to host group 123 | add_host: 124 | hostname: "{{ item.public_ip }}" 125 | groupname: launched 126 | with_items: "{{ ec2.instances }}" 127 | 128 | - name: Wait for SSH to come up 129 | wait_for: 130 | host: "{{ item.public_dns_name }}" 131 | port: 22 132 | delay: 60 133 | timeout: 320 134 | state: started 135 | with_items: "{{ ec2.instances }}" 136 | 137 | - name: Configure instance(s) 138 | hosts: launched 139 | become: yes 140 | become_method: sudo 141 | roles: 142 | - common-server 143 | - java 144 | - nifi 145 | - openrefine 146 | - superset 147 | - h2o 148 | - anaconda -------------------------------------------------------------------------------- /deployment/ansible/roles/superset/templates/superset.service.j2: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | ### BEGIN INIT INFO 3 | # Provides: skeleton 4 | # Required-Start: $remote_fs $syslog 5 | # Required-Stop: $remote_fs $syslog 6 | # Default-Start: 2 3 4 5 7 | # Default-Stop: 0 1 6 8 | # Short-Description: Example initscript 9 | # Description: This file should be used to construct scripts to be 10 | # placed in /etc/init.d. 11 | ### END INIT INFO 12 | 13 | # Author: El Mehdi EL BRINI 14 | # Do NOT "set -e" 15 | 16 | # PATH should only include /usr/* if it runs after the mountnfs.sh script 17 | PATH=/sbin:/usr/sbin:/bin:/usr/bin 18 | DESC="Superset Service by verteego" 19 | NAME=superset 20 | DAEMON=/opt/superset/venv/bin/superset 21 | DAEMON_ARGS="runserver -p {{ superset.host.port }} -a {{ superset.host.ip }}" 22 | PIDFILE=/var/run/$NAME.pid 23 | SCRIPTNAME=/etc/init.d/$NAME 24 | DAEMON_USER=superset 25 | 26 | # Exit if the package is not installed 27 | [ -x "$DAEMON" ] || exit 0 28 | 29 | # Read configuration variable file if it is present 30 | [ -r /etc/default/$NAME ] && . /etc/default/$NAME 31 | 32 | # Load the VERBOSE setting and other rcS variables 33 | . /lib/init/vars.sh 34 | 35 | # Define LSB log_* functions. 36 | # Depend on lsb-base (>= 3.2-14) to ensure that this file is present 37 | # and status_of_proc is working. 38 | . /lib/lsb/init-functions 39 | 40 | # 41 | # Function that starts the daemon/service 42 | # 43 | do_start() 44 | { 45 | # Return 46 | # 0 if daemon has been started 47 | # 1 if daemon was already running 48 | # 2 if daemon could not be started 49 | start-stop-daemon --start --background --chuid $DAEMON_USER -m --pidfile $PIDFILE --exec $DAEMON --test > /dev/null \ 50 | || return 1 51 | start-stop-daemon --start --background --chuid $DAEMON_USER -m --pidfile $PIDFILE --exec $DAEMON -- \ 52 | $DAEMON_ARGS \ 53 | || return 2 54 | # Add code here, if necessary, that waits for the process to be ready 55 | # to handle requests from services started subsequently which depend 56 | # on this one. As a last resort, sleep for some time. 57 | } 58 | 59 | # 60 | # Function that stops the daemon/service 61 | # 62 | do_stop() 63 | { 64 | # Return 65 | # 0 if daemon has been stopped 66 | # 1 if daemon was already stopped 67 | # 2 if daemon could not be stopped 68 | # other if a failure occurred 69 | start-stop-daemon --stop --quiet --retry=TERM/30/KILL/5 --pidfile $PIDFILE --name $NAME 70 | RETVAL="$?" 71 | [ "$RETVAL" = 2 ] && return 2 72 | # Wait for children to finish too if this is a daemon that forks 73 | # and if the daemon is only ever run from this initscript. 74 | # If the above conditions are not satisfied then add some other code 75 | # that waits for the process to drop all resources that could be 76 | # needed by services started subsequently. A last resort is to 77 | # sleep for some time. 78 | start-stop-daemon --stop --quiet --oknodo --retry=0/30/KILL/5 --exec $DAEMON 79 | [ "$?" = 2 ] && return 2 80 | # Many daemons don't delete their pidfiles when they exit. 81 | rm -f $PIDFILE 82 | return "$RETVAL" 83 | } 84 | 85 | # 86 | # Function that sends a SIGHUP to the daemon/service 87 | # 88 | do_reload() { 89 | # 90 | # If the daemon can reload its configuration without 91 | # restarting (for example, when it is sent a SIGHUP), 92 | # then implement that here. 93 | # 94 | start-stop-daemon --stop --signal 1 --quiet --pidfile $PIDFILE --name $NAME 95 | return 0 96 | } 97 | 98 | case "$1" in 99 | start) 100 | [ "$VERBOSE" != no ] && log_daemon_msg "Starting $DESC" "$NAME" 101 | do_start 102 | case "$?" in 103 | 0|1) [ "$VERBOSE" != no ] && log_end_msg 0 ;; 104 | 2) [ "$VERBOSE" != no ] && log_end_msg 1 ;; 105 | esac 106 | ;; 107 | stop) 108 | [ "$VERBOSE" != no ] && log_daemon_msg "Stopping $DESC" "$NAME" 109 | do_stop 110 | case "$?" in 111 | 0|1) [ "$VERBOSE" != no ] && log_end_msg 0 ;; 112 | 2) [ "$VERBOSE" != no ] && log_end_msg 1 ;; 113 | esac 114 | ;; 115 | status) 116 | status_of_proc "$DAEMON" "$NAME" && exit 0 || exit $? 117 | ;; 118 | #reload|force-reload) 119 | # 120 | # If do_reload() is not implemented then leave this commented out 121 | # and leave 'force-reload' as an alias for 'restart'. 122 | # 123 | #log_daemon_msg "Reloading $DESC" "$NAME" 124 | #do_reload 125 | #log_end_msg $? 126 | #;; 127 | restart|force-reload) 128 | # 129 | # If the "reload" option is implemented then remove the 130 | # 'force-reload' alias 131 | # 132 | log_daemon_msg "Restarting $DESC" "$NAME" 133 | do_stop 134 | case "$?" in 135 | 0|1) 136 | do_start 137 | case "$?" in 138 | 0) log_end_msg 0 ;; 139 | 1) log_end_msg 1 ;; # Old process is still running 140 | *) log_end_msg 1 ;; # Failed to start 141 | esac 142 | ;; 143 | *) 144 | # Failed to stop 145 | log_end_msg 1 146 | ;; 147 | esac 148 | ;; 149 | *) 150 | #echo "Usage: $SCRIPTNAME {start|stop|restart|reload|force-reload}" >&2 151 | echo "Usage: $SCRIPTNAME {start|stop|status|restart|force-reload}" >&2 152 | exit 3 153 | ;; 154 | esac 155 | 156 | : -------------------------------------------------------------------------------- /deployment/docker/nifi/templates/nifi.properties.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Core Properties # 17 | nifi.version=1.1.0-SNAPSHOT 18 | nifi.flow.configuration.file=./conf/flow.xml.gz 19 | nifi.flow.configuration.archive.enabled=true 20 | nifi.flow.configuration.archive.dir=./conf/archive/ 21 | nifi.flow.configuration.archive.max.time=30 days 22 | nifi.flow.configuration.archive.max.storage=500 MB 23 | nifi.flowcontroller.autoResumeState=true 24 | nifi.flowcontroller.graceful.shutdown.period=10 sec 25 | nifi.flowservice.writedelay.interval=500 ms 26 | nifi.administrative.yield.duration=30 sec 27 | # If a component has no work to do (is "bored"), how long should we wait before checking again for work? 28 | nifi.bored.yield.duration=10 millis 29 | 30 | nifi.authorizer.configuration.file=./conf/authorizers.xml 31 | nifi.login.identity.provider.configuration.file=./conf/login-identity-providers.xml 32 | nifi.templates.directory=./conf/templates 33 | nifi.ui.banner.text= 34 | nifi.ui.autorefresh.interval=30 sec 35 | nifi.nar.library.directory=./lib 36 | nifi.nar.working.directory=./work/nar/ 37 | nifi.documentation.working.directory=./work/docs/components 38 | 39 | #################### 40 | # State Management # 41 | #################### 42 | nifi.state.management.configuration.file=./conf/state-management.xml 43 | # The ID of the local state provider 44 | nifi.state.management.provider.local=local-provider 45 | # The ID of the cluster-wide state provider. This will be ignored if NiFi is not clustered but must be populated if running in a cluster. 46 | nifi.state.management.provider.cluster=zk-provider 47 | # Specifies whether or not this instance of NiFi should run an embedded ZooKeeper server 48 | nifi.state.management.embedded.zookeeper.start=false 49 | # Properties file that provides the ZooKeeper properties to use if is set to true 50 | nifi.state.management.embedded.zookeeper.properties=./conf/zookeeper.properties 51 | 52 | 53 | # H2 Settings 54 | nifi.database.directory=./database_repository 55 | nifi.h2.url.append=;LOCK_TIMEOUT=25000;WRITE_DELAY=0;AUTO_SERVER=FALSE 56 | 57 | # FlowFile Repository 58 | nifi.flowfile.repository.implementation=org.apache.nifi.controller.repository.WriteAheadFlowFileRepository 59 | nifi.flowfile.repository.directory=./flowfile_repository 60 | nifi.flowfile.repository.partitions=256 61 | nifi.flowfile.repository.checkpoint.interval=2 mins 62 | nifi.flowfile.repository.always.sync=false 63 | 64 | nifi.swap.manager.implementation=org.apache.nifi.controller.FileSystemSwapManager 65 | nifi.queue.swap.threshold=20000 66 | nifi.swap.in.period=5 sec 67 | nifi.swap.in.threads=1 68 | nifi.swap.out.period=5 sec 69 | nifi.swap.out.threads=4 70 | 71 | # Content Repository 72 | nifi.content.repository.implementation=org.apache.nifi.controller.repository.FileSystemRepository 73 | nifi.content.claim.max.appendable.size=10 MB 74 | nifi.content.claim.max.flow.files=100 75 | nifi.content.repository.directory.default=./content_repository 76 | nifi.content.repository.archive.max.retention.period=12 hours 77 | nifi.content.repository.archive.max.usage.percentage=50% 78 | nifi.content.repository.archive.enabled=true 79 | nifi.content.repository.always.sync=false 80 | nifi.content.viewer.url=/nifi-content-viewer/ 81 | 82 | # Provenance Repository Properties 83 | nifi.provenance.repository.implementation=org.apache.nifi.provenance.PersistentProvenanceRepository 84 | 85 | # Persistent Provenance Repository Properties 86 | nifi.provenance.repository.directory.default=./provenance_repository 87 | nifi.provenance.repository.max.storage.time=24 hours 88 | nifi.provenance.repository.max.storage.size=1 GB 89 | nifi.provenance.repository.rollover.time=30 secs 90 | nifi.provenance.repository.rollover.size=100 MB 91 | nifi.provenance.repository.query.threads=2 92 | nifi.provenance.repository.index.threads=1 93 | nifi.provenance.repository.compress.on.rollover=true 94 | nifi.provenance.repository.always.sync=false 95 | nifi.provenance.repository.journal.count=16 96 | # Comma-separated list of fields. Fields that are not indexed will not be searchable. Valid fields are: 97 | # EventType, FlowFileUUID, Filename, TransitURI, ProcessorID, AlternateIdentifierURI, Relationship, Details 98 | nifi.provenance.repository.indexed.fields=EventType, FlowFileUUID, Filename, ProcessorID, Relationship 99 | # FlowFile Attributes that should be indexed and made searchable. Some examples to consider are filename, uuid, mime.type 100 | nifi.provenance.repository.indexed.attributes= 101 | # Large values for the shard size will result in more Java heap usage when searching the Provenance Repository 102 | # but should provide better performance 103 | nifi.provenance.repository.index.shard.size=500 MB 104 | # Indicates the maximum length that a FlowFile attribute can be when retrieving a Provenance Event from 105 | # the repository. If the length of any attribute exceeds this value, it will be truncated when the event is retrieved. 106 | nifi.provenance.repository.max.attribute.length=65536 107 | 108 | # Volatile Provenance Respository Properties 109 | nifi.provenance.repository.buffer.size=100000 110 | 111 | # Component Status Repository 112 | nifi.components.status.repository.implementation=org.apache.nifi.controller.status.history.VolatileComponentStatusRepository 113 | nifi.components.status.repository.buffer.size=1440 114 | nifi.components.status.snapshot.frequency=1 min 115 | 116 | # Site to Site properties 117 | nifi.remote.input.host= 118 | nifi.remote.input.secure=false 119 | nifi.remote.input.socket.port= 120 | nifi.remote.input.http.enabled=true 121 | nifi.remote.input.http.transaction.ttl=30 sec 122 | 123 | # web properties # 124 | nifi.web.war.directory=./lib 125 | nifi.web.http.host= 126 | nifi.web.http.port={{ nifi.host.port }} 127 | nifi.web.https.host= 128 | nifi.web.https.port= 129 | nifi.web.jetty.working.directory=./work/jetty 130 | nifi.web.jetty.threads=200 131 | 132 | # security properties # 133 | nifi.sensitive.props.key= 134 | nifi.sensitive.props.key.protected= 135 | nifi.sensitive.props.algorithm=PBEWITHMD5AND256BITAES-CBC-OPENSSL 136 | nifi.sensitive.props.provider=BC 137 | nifi.sensitive.props.additional.keys= 138 | 139 | nifi.security.keystore= 140 | nifi.security.keystoreType= 141 | nifi.security.keystorePasswd= 142 | nifi.security.keyPasswd= 143 | nifi.security.truststore= 144 | nifi.security.truststoreType= 145 | nifi.security.truststorePasswd= 146 | nifi.security.needClientAuth= 147 | nifi.security.user.authorizer=file-provider 148 | nifi.security.user.login.identity.provider= 149 | nifi.security.ocsp.responder.url= 150 | nifi.security.ocsp.responder.certificate= 151 | 152 | # Identity Mapping Properties # 153 | # These properties allow normalizing user identities such that identities coming from different identity providers 154 | # (certificates, LDAP, Kerberos) can be treated the same internally in NiFi. The following example demonstrates normalizing 155 | # DNs from certificates and principals from Kerberos into a common identity string: 156 | # 157 | # nifi.security.identity.mapping.pattern.dn=^CN=(.*?), OU=(.*?), O=(.*?), L=(.*?), ST=(.*?), C=(.*?)$ 158 | # nifi.security.identity.mapping.value.dn=$1@$2 159 | # nifi.security.identity.mapping.pattern.kerb=^(.*?)/instance@(.*?)$ 160 | # nifi.security.identity.mapping.value.kerb=$1@$2 161 | 162 | # cluster common properties (all nodes must have same values) # 163 | nifi.cluster.protocol.heartbeat.interval=5 sec 164 | nifi.cluster.protocol.is.secure=false 165 | 166 | # cluster node properties (only configure for cluster nodes) # 167 | nifi.cluster.is.node=false 168 | nifi.cluster.node.address= 169 | nifi.cluster.node.protocol.port= 170 | nifi.cluster.node.protocol.threads=10 171 | nifi.cluster.node.event.history.size=25 172 | nifi.cluster.node.connection.timeout=5 sec 173 | nifi.cluster.node.read.timeout=5 sec 174 | nifi.cluster.firewall.file= 175 | nifi.cluster.flow.election.max.wait.time=5 mins 176 | nifi.cluster.flow.election.max.candidates= 177 | 178 | # zookeeper properties, used for cluster management # 179 | nifi.zookeeper.connect.string= 180 | nifi.zookeeper.connect.timeout=3 secs 181 | nifi.zookeeper.session.timeout=3 secs 182 | nifi.zookeeper.root.node=/nifi 183 | 184 | # kerberos # 185 | nifi.kerberos.krb5.file= 186 | 187 | # kerberos service principal # 188 | nifi.kerberos.service.principal= 189 | nifi.kerberos.service.keytab.location= 190 | 191 | # kerberos spnego principal # 192 | nifi.kerberos.spnego.principal= 193 | nifi.kerberos.spnego.keytab.location= 194 | nifi.kerberos.spnego.authentication.expiration=12 hours 195 | 196 | # external properties files for variable registry 197 | # supports a comma delimited list of file locations 198 | nifi.variable.registry.properties= -------------------------------------------------------------------------------- /deployment/ansible/roles/nifi/templates/nifi.properties.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Core Properties # 17 | nifi.version=1.1.0-SNAPSHOT 18 | nifi.flow.configuration.file=./conf/flow.xml.gz 19 | nifi.flow.configuration.archive.enabled=true 20 | nifi.flow.configuration.archive.dir=./conf/archive/ 21 | nifi.flow.configuration.archive.max.time=30 days 22 | nifi.flow.configuration.archive.max.storage=500 MB 23 | nifi.flowcontroller.autoResumeState=true 24 | nifi.flowcontroller.graceful.shutdown.period=10 sec 25 | nifi.flowservice.writedelay.interval=500 ms 26 | nifi.administrative.yield.duration=30 sec 27 | # If a component has no work to do (is "bored"), how long should we wait before checking again for work? 28 | nifi.bored.yield.duration=10 millis 29 | 30 | nifi.authorizer.configuration.file=./conf/authorizers.xml 31 | nifi.login.identity.provider.configuration.file=./conf/login-identity-providers.xml 32 | nifi.templates.directory=./conf/templates 33 | nifi.ui.banner.text= 34 | nifi.ui.autorefresh.interval=30 sec 35 | nifi.nar.library.directory=./lib 36 | nifi.nar.working.directory=./work/nar/ 37 | nifi.documentation.working.directory=./work/docs/components 38 | 39 | #################### 40 | # State Management # 41 | #################### 42 | nifi.state.management.configuration.file=./conf/state-management.xml 43 | # The ID of the local state provider 44 | nifi.state.management.provider.local=local-provider 45 | # The ID of the cluster-wide state provider. This will be ignored if NiFi is not clustered but must be populated if running in a cluster. 46 | nifi.state.management.provider.cluster=zk-provider 47 | # Specifies whether or not this instance of NiFi should run an embedded ZooKeeper server 48 | nifi.state.management.embedded.zookeeper.start=false 49 | # Properties file that provides the ZooKeeper properties to use if is set to true 50 | nifi.state.management.embedded.zookeeper.properties=./conf/zookeeper.properties 51 | 52 | 53 | # H2 Settings 54 | nifi.database.directory=./database_repository 55 | nifi.h2.url.append=;LOCK_TIMEOUT=25000;WRITE_DELAY=0;AUTO_SERVER=FALSE 56 | 57 | # FlowFile Repository 58 | nifi.flowfile.repository.implementation=org.apache.nifi.controller.repository.WriteAheadFlowFileRepository 59 | nifi.flowfile.repository.directory=./flowfile_repository 60 | nifi.flowfile.repository.partitions=256 61 | nifi.flowfile.repository.checkpoint.interval=2 mins 62 | nifi.flowfile.repository.always.sync=false 63 | 64 | nifi.swap.manager.implementation=org.apache.nifi.controller.FileSystemSwapManager 65 | nifi.queue.swap.threshold=20000 66 | nifi.swap.in.period=5 sec 67 | nifi.swap.in.threads=1 68 | nifi.swap.out.period=5 sec 69 | nifi.swap.out.threads=4 70 | 71 | # Content Repository 72 | nifi.content.repository.implementation=org.apache.nifi.controller.repository.FileSystemRepository 73 | nifi.content.claim.max.appendable.size=10 MB 74 | nifi.content.claim.max.flow.files=100 75 | nifi.content.repository.directory.default=./content_repository 76 | nifi.content.repository.archive.max.retention.period=12 hours 77 | nifi.content.repository.archive.max.usage.percentage=50% 78 | nifi.content.repository.archive.enabled=true 79 | nifi.content.repository.always.sync=false 80 | nifi.content.viewer.url=/nifi-content-viewer/ 81 | 82 | # Provenance Repository Properties 83 | nifi.provenance.repository.implementation=org.apache.nifi.provenance.PersistentProvenanceRepository 84 | 85 | # Persistent Provenance Repository Properties 86 | nifi.provenance.repository.directory.default=./provenance_repository 87 | nifi.provenance.repository.max.storage.time=24 hours 88 | nifi.provenance.repository.max.storage.size=1 GB 89 | nifi.provenance.repository.rollover.time=30 secs 90 | nifi.provenance.repository.rollover.size=100 MB 91 | nifi.provenance.repository.query.threads=2 92 | nifi.provenance.repository.index.threads=1 93 | nifi.provenance.repository.compress.on.rollover=true 94 | nifi.provenance.repository.always.sync=false 95 | nifi.provenance.repository.journal.count=16 96 | # Comma-separated list of fields. Fields that are not indexed will not be searchable. Valid fields are: 97 | # EventType, FlowFileUUID, Filename, TransitURI, ProcessorID, AlternateIdentifierURI, Relationship, Details 98 | nifi.provenance.repository.indexed.fields=EventType, FlowFileUUID, Filename, ProcessorID, Relationship 99 | # FlowFile Attributes that should be indexed and made searchable. Some examples to consider are filename, uuid, mime.type 100 | nifi.provenance.repository.indexed.attributes= 101 | # Large values for the shard size will result in more Java heap usage when searching the Provenance Repository 102 | # but should provide better performance 103 | nifi.provenance.repository.index.shard.size=500 MB 104 | # Indicates the maximum length that a FlowFile attribute can be when retrieving a Provenance Event from 105 | # the repository. If the length of any attribute exceeds this value, it will be truncated when the event is retrieved. 106 | nifi.provenance.repository.max.attribute.length=65536 107 | 108 | # Volatile Provenance Respository Properties 109 | nifi.provenance.repository.buffer.size=100000 110 | 111 | # Component Status Repository 112 | nifi.components.status.repository.implementation=org.apache.nifi.controller.status.history.VolatileComponentStatusRepository 113 | nifi.components.status.repository.buffer.size=1440 114 | nifi.components.status.snapshot.frequency=1 min 115 | 116 | # Site to Site properties 117 | nifi.remote.input.host= 118 | nifi.remote.input.secure=false 119 | nifi.remote.input.socket.port= 120 | nifi.remote.input.http.enabled=true 121 | nifi.remote.input.http.transaction.ttl=30 sec 122 | 123 | # web properties # 124 | nifi.web.war.directory=./lib 125 | nifi.web.http.host= 126 | nifi.web.http.port={{ nifi.host.port }} 127 | nifi.web.https.host= 128 | nifi.web.https.port= 129 | nifi.web.jetty.working.directory=./work/jetty 130 | nifi.web.jetty.threads=200 131 | 132 | # security properties # 133 | nifi.sensitive.props.key= 134 | nifi.sensitive.props.key.protected= 135 | nifi.sensitive.props.algorithm=PBEWITHMD5AND256BITAES-CBC-OPENSSL 136 | nifi.sensitive.props.provider=BC 137 | nifi.sensitive.props.additional.keys= 138 | 139 | nifi.security.keystore= 140 | nifi.security.keystoreType= 141 | nifi.security.keystorePasswd= 142 | nifi.security.keyPasswd= 143 | nifi.security.truststore= 144 | nifi.security.truststoreType= 145 | nifi.security.truststorePasswd= 146 | nifi.security.needClientAuth= 147 | nifi.security.user.authorizer=file-provider 148 | nifi.security.user.login.identity.provider= 149 | nifi.security.ocsp.responder.url= 150 | nifi.security.ocsp.responder.certificate= 151 | 152 | # Identity Mapping Properties # 153 | # These properties allow normalizing user identities such that identities coming from different identity providers 154 | # (certificates, LDAP, Kerberos) can be treated the same internally in NiFi. The following example demonstrates normalizing 155 | # DNs from certificates and principals from Kerberos into a common identity string: 156 | # 157 | # nifi.security.identity.mapping.pattern.dn=^CN=(.*?), OU=(.*?), O=(.*?), L=(.*?), ST=(.*?), C=(.*?)$ 158 | # nifi.security.identity.mapping.value.dn=$1@$2 159 | # nifi.security.identity.mapping.pattern.kerb=^(.*?)/instance@(.*?)$ 160 | # nifi.security.identity.mapping.value.kerb=$1@$2 161 | 162 | # cluster common properties (all nodes must have same values) # 163 | nifi.cluster.protocol.heartbeat.interval=5 sec 164 | nifi.cluster.protocol.is.secure=false 165 | 166 | # cluster node properties (only configure for cluster nodes) # 167 | nifi.cluster.is.node=false 168 | nifi.cluster.node.address= 169 | nifi.cluster.node.protocol.port= 170 | nifi.cluster.node.protocol.threads=10 171 | nifi.cluster.node.event.history.size=25 172 | nifi.cluster.node.connection.timeout=5 sec 173 | nifi.cluster.node.read.timeout=5 sec 174 | nifi.cluster.firewall.file= 175 | nifi.cluster.flow.election.max.wait.time=5 mins 176 | nifi.cluster.flow.election.max.candidates= 177 | 178 | # zookeeper properties, used for cluster management # 179 | nifi.zookeeper.connect.string= 180 | nifi.zookeeper.connect.timeout=3 secs 181 | nifi.zookeeper.session.timeout=3 secs 182 | nifi.zookeeper.root.node=/nifi 183 | 184 | # kerberos # 185 | nifi.kerberos.krb5.file= 186 | 187 | # kerberos service principal # 188 | nifi.kerberos.service.principal= 189 | nifi.kerberos.service.keytab.location= 190 | 191 | # kerberos spnego principal # 192 | nifi.kerberos.spnego.principal= 193 | nifi.kerberos.spnego.keytab.location= 194 | nifi.kerberos.spnego.authentication.expiration=12 hours 195 | 196 | # external properties files for variable registry 197 | # supports a comma delimited list of file locations 198 | nifi.variable.registry.properties= -------------------------------------------------------------------------------- /deployment/ansible/roles/anaconda/tasks/templates/jupyter_notebook_config.py.j2: -------------------------------------------------------------------------------- 1 | # Configuration file for jupyter-notebook. 2 | 3 | #------------------------------------------------------------------------------ 4 | # Application(SingletonConfigurable) configuration 5 | #------------------------------------------------------------------------------ 6 | 7 | ## This is an application. 8 | 9 | ## The date format used by logging formatters for %(asctime)s 10 | #c.Application.log_datefmt = '%Y-%m-%d %H:%M:%S' 11 | 12 | ## The Logging format template 13 | #c.Application.log_format = '[%(name)s]%(highlevel)s %(message)s' 14 | 15 | ## Set the log level by value or name. 16 | #c.Application.log_level = 30 17 | 18 | #------------------------------------------------------------------------------ 19 | # JupyterApp(Application) configuration 20 | #------------------------------------------------------------------------------ 21 | 22 | ## Base class for Jupyter applications 23 | 24 | ## Answer yes to any prompts. 25 | #c.JupyterApp.answer_yes = False 26 | 27 | ## Full path of a config file. 28 | #c.JupyterApp.config_file = u'' 29 | 30 | ## Specify a config file to load. 31 | #c.JupyterApp.config_file_name = u'' 32 | 33 | ## Generate default config file. 34 | #c.JupyterApp.generate_config = False 35 | 36 | #------------------------------------------------------------------------------ 37 | # NotebookApp(JupyterApp) configuration 38 | #------------------------------------------------------------------------------ 39 | 40 | ## Set the Access-Control-Allow-Credentials: true header 41 | #c.NotebookApp.allow_credentials = True 42 | 43 | ## Set the Access-Control-Allow-Origin header 44 | # 45 | # Use '*' to allow any origin to access your server. 46 | # 47 | # Takes precedence over allow_origin_pat. 48 | #c.NotebookApp.allow_origin = "*" 49 | 50 | ## Use a regular expression for the Access-Control-Allow-Origin header 51 | # 52 | # Requests from an origin matching the expression will get replies with: 53 | # 54 | # Access-Control-Allow-Origin: origin 55 | # 56 | # where `origin` is the origin of the request. 57 | # 58 | # Ignored if allow_origin is set. 59 | #c.NotebookApp.allow_origin_pat = '' 60 | 61 | ## DEPRECATED use base_url 62 | #c.NotebookApp.base_project_url = '/' 63 | 64 | ## The base URL for the notebook server. 65 | # 66 | # Leading and trailing slashes can be omitted, and will automatically be added. 67 | c.NotebookApp.base_url = '/' 68 | 69 | ## Specify what command to use to invoke a web browser when opening the notebook. 70 | # If not specified, the default browser will be determined by the `webbrowser` 71 | # standard library module, which allows setting of the BROWSER environment 72 | # variable to override it. 73 | #c.NotebookApp.browser = u'' 74 | 75 | ## The full path to an SSL/TLS certificate file. 76 | #c.NotebookApp.certfile = u'' 77 | 78 | ## The full path to a certificate authority certificate for SSL/TLS client 79 | # authentication. 80 | #c.NotebookApp.client_ca = u'' 81 | 82 | ## The config manager class to use 83 | #c.NotebookApp.config_manager_class = 'notebook.services.config.manager.ConfigManager' 84 | 85 | ## The notebook manager class to use. 86 | #c.NotebookApp.contents_manager_class = 'notebook.services.contents.filemanager.FileContentsManager' 87 | 88 | ## Extra keyword arguments to pass to `set_secure_cookie`. See tornado's 89 | # set_secure_cookie docs for details. 90 | #c.NotebookApp.cookie_options = {} 91 | 92 | ## The random bytes used to secure cookies. By default this is a new random 93 | # number every time you start the Notebook. Set it to a value in a config file 94 | # to enable logins to persist across server sessions. 95 | # 96 | # Note: Cookie secrets should be kept private, do not share config files with 97 | # cookie_secret stored in plaintext (you can read the value from a file). 98 | #c.NotebookApp.cookie_secret = '' 99 | 100 | ## The file where the cookie secret is stored. 101 | #c.NotebookApp.cookie_secret_file = u'' 102 | 103 | ## The default URL to redirect to from `/` 104 | #c.NotebookApp.default_url = '/tree' 105 | 106 | ## Whether to enable MathJax for typesetting math/TeX 107 | # 108 | # MathJax is the javascript library Jupyter uses to render math/LaTeX. It is 109 | # very large, so you may want to disable it if you have a slow internet 110 | # connection, or for offline use of the notebook. 111 | # 112 | # When disabled, equations etc. will appear as their untransformed TeX source. 113 | #c.NotebookApp.enable_mathjax = True 114 | 115 | ## extra paths to look for Javascript notebook extensions 116 | #c.NotebookApp.extra_nbextensions_path = [] 117 | 118 | ## Extra paths to search for serving static files. 119 | # 120 | # This allows adding javascript/css to be available from the notebook server 121 | # machine, or overriding individual files in the IPython 122 | #c.NotebookApp.extra_static_paths = [] 123 | 124 | ## Extra paths to search for serving jinja templates. 125 | # 126 | # Can be used to override templates from notebook.templates. 127 | #c.NotebookApp.extra_template_paths = [] 128 | 129 | ## 130 | #c.NotebookApp.file_to_run = '' 131 | 132 | ## Use minified JS file or not, mainly use during dev to avoid JS recompilation 133 | #c.NotebookApp.ignore_minified_js = False 134 | 135 | ## (bytes/sec) Maximum rate at which messages can be sent on iopub before they 136 | # are limited. 137 | #c.NotebookApp.iopub_data_rate_limit = 0 138 | 139 | ## (msg/sec) Maximum rate at which messages can be sent on iopub before they are 140 | # limited. 141 | #c.NotebookApp.iopub_msg_rate_limit = 0 142 | 143 | ## The IP address the notebook server will listen on. 144 | c.NotebookApp.ip = '{{ anaconda.host.ip }}' 145 | 146 | ## Supply extra arguments that will be passed to Jinja environment. 147 | #c.NotebookApp.jinja_environment_options = {} 148 | 149 | ## Extra variables to supply to jinja templates when rendering. 150 | #c.NotebookApp.jinja_template_vars = {} 151 | 152 | ## The kernel manager class to use. 153 | #c.NotebookApp.kernel_manager_class = 'notebook.services.kernels.kernelmanager.MappingKernelManager' 154 | 155 | ## The kernel spec manager class to use. Should be a subclass of 156 | # `jupyter_client.kernelspec.KernelSpecManager`. 157 | # 158 | # The Api of KernelSpecManager is provisional and might change without warning 159 | # between this version of Jupyter and the next stable one. 160 | #c.NotebookApp.kernel_spec_manager_class = 'jupyter_client.kernelspec.KernelSpecManager' 161 | 162 | ## The full path to a private key file for usage with SSL/TLS. 163 | #c.NotebookApp.keyfile = u'' 164 | 165 | ## The login handler class to use. 166 | #c.NotebookApp.login_handler_class = 'notebook.auth.login.LoginHandler' 167 | 168 | ## The logout handler class to use. 169 | #c.NotebookApp.logout_handler_class = 'notebook.auth.logout.LogoutHandler' 170 | 171 | ## The url for MathJax.js. 172 | #c.NotebookApp.mathjax_url = '' 173 | 174 | ## Dict of Python modules to load as notebook server extensions.Entry values can 175 | # be used to enable and disable the loading ofthe extensions. 176 | #c.NotebookApp.nbserver_extensions = {} 177 | 178 | ## The directory to use for notebooks and kernels. 179 | c.NotebookApp.notebook_dir = u'/home/anaconda/' 180 | 181 | ## Whether to open in a browser after starting. The specific browser used is 182 | # platform dependent and determined by the python standard library `webbrowser` 183 | # module, unless it is overridden using the --browser (NotebookApp.browser) 184 | # configuration option. 185 | c.NotebookApp.open_browser = False 186 | 187 | ## Hashed password to use for web authentication. 188 | # 189 | # To generate, type in a python/IPython shell: 190 | # 191 | # from notebook.auth import passwd; passwd() 192 | # 193 | # The string should be of the form type:salt:hashed-password. 194 | #c.NotebookApp.password = u'' 195 | 196 | ## The port the notebook server will listen on. 197 | c.NotebookApp.port = {{ anaconda.host.port }} 198 | 199 | ## The number of additional ports to try if the specified port is not available. 200 | #c.NotebookApp.port_retries = 50 201 | 202 | ## DISABLED: use %pylab or %matplotlib in the notebook to enable matplotlib. 203 | #c.NotebookApp.pylab = 'disabled' 204 | 205 | ## (sec) Time window used to check the message and data rate limits. 206 | #c.NotebookApp.rate_limit_window = 1.0 207 | 208 | ## Reraise exceptions encountered loading server extensions? 209 | #c.NotebookApp.reraise_server_extension_failures = False 210 | 211 | ## DEPRECATED use the nbserver_extensions dict instead 212 | #c.NotebookApp.server_extensions = [] 213 | 214 | ## The session manager class to use. 215 | #c.NotebookApp.session_manager_class = 'notebook.services.sessions.sessionmanager.SessionManager' 216 | 217 | ## Supply SSL options for the tornado HTTPServer. See the tornado docs for 218 | # details. 219 | #c.NotebookApp.ssl_options = {} 220 | 221 | ## Supply overrides for the tornado.web.Application that the Jupyter notebook 222 | # uses. 223 | c.NotebookApp.tornado_settings = { 'headers': { 'Content-Security-Policy': "frame-ancestors 'self' * " } } 224 | 225 | ## Whether to trust or not X-Scheme/X-Forwarded-Proto and X-Real-Ip/X-Forwarded- 226 | # For headerssent by the upstream reverse proxy. Necessary if the proxy handles 227 | # SSL 228 | c.NotebookApp.trust_xheaders = True 229 | 230 | ## DEPRECATED, use tornado_settings 231 | #c.NotebookApp.webapp_settings = {} 232 | 233 | ## The base URL for websockets, if it differs from the HTTP server (hint: it 234 | # almost certainly doesn't). 235 | # 236 | # Should be in the form of an HTTP origin: ws[s]://hostname[:port] 237 | #c.NotebookApp.websocket_url = 'http://104.199.30.70:8088' 238 | 239 | #------------------------------------------------------------------------------ 240 | # ConnectionFileMixin(LoggingConfigurable) configuration 241 | #------------------------------------------------------------------------------ 242 | 243 | ## Mixin for configurable classes that work with connection files 244 | 245 | ## JSON file in which to store connection info [default: kernel-.json] 246 | # 247 | # This file will contain the IP, ports, and authentication key needed to connect 248 | # clients to this kernel. By default, this file will be created in the security 249 | # dir of the current profile, but can be specified by absolute path. 250 | #c.ConnectionFileMixin.connection_file = '' 251 | 252 | ## set the control (ROUTER) port [default: random] 253 | #c.ConnectionFileMixin.control_port = 0 254 | 255 | ## set the heartbeat port [default: random] 256 | #c.ConnectionFileMixin.hb_port = 0 257 | 258 | ## set the iopub (PUB) port [default: random] 259 | #c.ConnectionFileMixin.iopub_port = 0 260 | 261 | ## Set the kernel's IP address [default localhost]. If the IP address is 262 | # something other than localhost, then Consoles on other machines will be able 263 | # to connect to the Kernel, so be careful! 264 | #c.ConnectionFileMixin.ip = u'' 265 | 266 | ## set the shell (ROUTER) port [default: random] 267 | #c.ConnectionFileMixin.shell_port = 0 268 | 269 | ## set the stdin (ROUTER) port [default: random] 270 | #c.ConnectionFileMixin.stdin_port = 0 271 | 272 | ## 273 | #c.ConnectionFileMixin.transport = 'tcp' 274 | 275 | #------------------------------------------------------------------------------ 276 | # KernelManager(ConnectionFileMixin) configuration 277 | #------------------------------------------------------------------------------ 278 | 279 | ## Manages a single kernel in a subprocess on this host. 280 | # 281 | # This version starts kernels with Popen. 282 | 283 | ## Should we autorestart the kernel if it dies. 284 | #c.KernelManager.autorestart = True 285 | 286 | ## DEPRECATED: Use kernel_name instead. 287 | # 288 | # The Popen Command to launch the kernel. Override this if you have a custom 289 | # kernel. If kernel_cmd is specified in a configuration file, Jupyter does not 290 | # pass any arguments to the kernel, because it cannot make any assumptions about 291 | # the arguments that the kernel understands. In particular, this means that the 292 | # kernel does not receive the option --debug if it given on the Jupyter command 293 | # line. 294 | #c.KernelManager.kernel_cmd = [] 295 | 296 | #------------------------------------------------------------------------------ 297 | # Session(Configurable) configuration 298 | #------------------------------------------------------------------------------ 299 | 300 | ## Object for handling serialization and sending of messages. 301 | # 302 | # The Session object handles building messages and sending them with ZMQ sockets 303 | # or ZMQStream objects. Objects can communicate with each other over the 304 | # network via Session objects, and only need to work with the dict-based IPython 305 | # message spec. The Session will handle serialization/deserialization, security, 306 | # and metadata. 307 | # 308 | # Sessions support configurable serialization via packer/unpacker traits, and 309 | # signing with HMAC digests via the key/keyfile traits. 310 | # 311 | # Parameters ---------- 312 | # 313 | # debug : bool 314 | # whether to trigger extra debugging statements 315 | # packer/unpacker : str : 'json', 'pickle' or import_string 316 | # importstrings for methods to serialize message parts. If just 317 | # 'json' or 'pickle', predefined JSON and pickle packers will be used. 318 | # Otherwise, the entire importstring must be used. 319 | # 320 | # The functions must accept at least valid JSON input, and output *bytes*. 321 | # 322 | # For example, to use msgpack: 323 | # packer = 'msgpack.packb', unpacker='msgpack.unpackb' 324 | # pack/unpack : callables 325 | # You can also set the pack/unpack callables for serialization directly. 326 | # session : bytes 327 | # the ID of this Session object. The default is to generate a new UUID. 328 | # username : unicode 329 | # username added to message headers. The default is to ask the OS. 330 | # key : bytes 331 | # The key used to initialize an HMAC signature. If unset, messages 332 | # will not be signed or checked. 333 | # keyfile : filepath 334 | # The file containing a key. If this is set, `key` will be initialized 335 | # to the contents of the file. 336 | 337 | ## Threshold (in bytes) beyond which an object's buffer should be extracted to 338 | # avoid pickling. 339 | #c.Session.buffer_threshold = 1024 340 | 341 | ## Whether to check PID to protect against calls after fork. 342 | # 343 | # This check can be disabled if fork-safety is handled elsewhere. 344 | #c.Session.check_pid = True 345 | 346 | ## Threshold (in bytes) beyond which a buffer should be sent without copying. 347 | #c.Session.copy_threshold = 65536 348 | 349 | ## Debug output in the Session 350 | #c.Session.debug = False 351 | 352 | ## The maximum number of digests to remember. 353 | # 354 | # The digest history will be culled when it exceeds this value. 355 | #c.Session.digest_history_size = 65536 356 | 357 | ## The maximum number of items for a container to be introspected for custom 358 | # serialization. Containers larger than this are pickled outright. 359 | #c.Session.item_threshold = 64 360 | 361 | ## execution key, for signing messages. 362 | #c.Session.key = '' 363 | 364 | ## path to file containing execution key. 365 | #c.Session.keyfile = '' 366 | 367 | ## Metadata dictionary, which serves as the default top-level metadata dict for 368 | # each message. 369 | #c.Session.metadata = {} 370 | 371 | ## The name of the packer for serializing messages. Should be one of 'json', 372 | # 'pickle', or an import name for a custom callable serializer. 373 | #c.Session.packer = 'json' 374 | 375 | ## The UUID identifying this session. 376 | #c.Session.session = u'' 377 | 378 | ## The digest scheme used to construct the message signatures. Must have the form 379 | # 'hmac-HASH'. 380 | #c.Session.signature_scheme = 'hmac-sha256' 381 | 382 | ## The name of the unpacker for unserializing messages. Only used with custom 383 | # functions for `packer`. 384 | #c.Session.unpacker = 'json' 385 | 386 | ## Username for the Session. Default is your system username. 387 | #c.Session.username = u'schiessl' 388 | 389 | #------------------------------------------------------------------------------ 390 | # MultiKernelManager(LoggingConfigurable) configuration 391 | #------------------------------------------------------------------------------ 392 | 393 | ## A class for managing multiple kernels. 394 | 395 | ## The name of the default kernel to start 396 | #c.MultiKernelManager.default_kernel_name = 'python2' 397 | 398 | ## The kernel manager class. This is configurable to allow subclassing of the 399 | # KernelManager for customized behavior. 400 | #c.MultiKernelManager.kernel_manager_class = 'jupyter_client.ioloop.IOLoopKernelManager' 401 | 402 | #------------------------------------------------------------------------------ 403 | # MappingKernelManager(MultiKernelManager) configuration 404 | #------------------------------------------------------------------------------ 405 | 406 | ## A KernelManager that handles notebook mapping and HTTP error handling 407 | 408 | ## 409 | #c.MappingKernelManager.root_dir = u'' 410 | 411 | #------------------------------------------------------------------------------ 412 | # ContentsManager(LoggingConfigurable) configuration 413 | #------------------------------------------------------------------------------ 414 | 415 | ## Base class for serving files and directories. 416 | # 417 | # This serves any text or binary file, as well as directories, with special 418 | # handling for JSON notebook documents. 419 | # 420 | # Most APIs take a path argument, which is always an API-style unicode path, and 421 | # always refers to a directory. 422 | # 423 | # - unicode, not url-escaped 424 | # - '/'-separated 425 | # - leading and trailing '/' will be stripped 426 | # - if unspecified, path defaults to '', 427 | # indicating the root path. 428 | 429 | ## 430 | #c.ContentsManager.checkpoints = None 431 | 432 | ## 433 | #c.ContentsManager.checkpoints_class = 'notebook.services.contents.checkpoints.Checkpoints' 434 | 435 | ## 436 | #c.ContentsManager.checkpoints_kwargs = {} 437 | 438 | ## Glob patterns to hide in file and directory listings. 439 | c.ContentsManager.hide_globs = [u'__pycache__', 'venv', '*.pyc', '*.pyo', '.DS_Store', '*.so', '*.dylib', '*~'] 440 | 441 | ## Python callable or importstring thereof 442 | # 443 | # To be called on a contents model prior to save. 444 | # 445 | # This can be used to process the structure, such as removing notebook outputs 446 | # or other side effects that should not be saved. 447 | # 448 | # It will be called as (all arguments passed by keyword):: 449 | # 450 | # hook(path=path, model=model, contents_manager=self) 451 | # 452 | # - model: the model to be saved. Includes file contents. 453 | # Modifying this dict will affect the file that is stored. 454 | # - path: the API path of the save destination 455 | # - contents_manager: this ContentsManager instance 456 | #c.ContentsManager.pre_save_hook = None 457 | 458 | ## The base name used when creating untitled directories. 459 | #c.ContentsManager.untitled_directory = 'Untitled Folder' 460 | 461 | ## The base name used when creating untitled files. 462 | #c.ContentsManager.untitled_file = 'untitled' 463 | 464 | ## The base name used when creating untitled notebooks. 465 | #c.ContentsManager.untitled_notebook = 'Untitled' 466 | 467 | #------------------------------------------------------------------------------ 468 | # FileManagerMixin(Configurable) configuration 469 | #------------------------------------------------------------------------------ 470 | 471 | ## Mixin for ContentsAPI classes that interact with the filesystem. 472 | # 473 | # Provides facilities for reading, writing, and copying both notebooks and 474 | # generic files. 475 | # 476 | # Shared by FileContentsManager and FileCheckpoints. 477 | # 478 | # Note ---- Classes using this mixin must provide the following attributes: 479 | # 480 | # root_dir : unicode 481 | # A directory against against which API-style paths are to be resolved. 482 | # 483 | # log : logging.Logger 484 | 485 | ## By default notebooks are saved on disk on a temporary file and then if 486 | # succefully written, it replaces the old ones. This procedure, namely 487 | # 'atomic_writing', causes some bugs on file system whitout operation order 488 | # enforcement (like some networked fs). If set to False, the new notebook is 489 | # written directly on the old one which could fail (eg: full filesystem or quota 490 | # ) 491 | #c.FileManagerMixin.use_atomic_writing = True 492 | 493 | #------------------------------------------------------------------------------ 494 | # FileContentsManager(FileManagerMixin,ContentsManager) configuration 495 | #------------------------------------------------------------------------------ 496 | 497 | ## Python callable or importstring thereof 498 | # 499 | # to be called on the path of a file just saved. 500 | # 501 | # This can be used to process the file on disk, such as converting the notebook 502 | # to a script or HTML via nbconvert. 503 | # 504 | # It will be called as (all arguments passed by keyword):: 505 | # 506 | # hook(os_path=os_path, model=model, contents_manager=instance) 507 | # 508 | # - path: the filesystem path to the file just written - model: the model 509 | # representing the file - contents_manager: this ContentsManager instance 510 | #c.FileContentsManager.post_save_hook = None 511 | 512 | ## 513 | #c.FileContentsManager.root_dir = u'' 514 | 515 | ## DEPRECATED, use post_save_hook. Will be removed in Notebook 5.0 516 | #c.FileContentsManager.save_script = False 517 | 518 | #------------------------------------------------------------------------------ 519 | # NotebookNotary(LoggingConfigurable) configuration 520 | #------------------------------------------------------------------------------ 521 | 522 | ## A class for computing and verifying notebook signatures. 523 | 524 | ## The hashing algorithm used to sign notebooks. 525 | #c.NotebookNotary.algorithm = 'sha256' 526 | 527 | ## The number of notebook signatures to cache. When the number of signatures 528 | # exceeds this value, the oldest 25% of signatures will be culled. 529 | #c.NotebookNotary.cache_size = 65535 530 | 531 | ## The sqlite file in which to store notebook signatures. By default, this will 532 | # be in your Jupyter data directory. You can set it to ':memory:' to disable 533 | # sqlite writing to the filesystem. 534 | #c.NotebookNotary.db_file = u'' 535 | 536 | ## The secret key with which notebooks are signed. 537 | #c.NotebookNotary.secret = '' 538 | 539 | ## The file where the secret key is stored. 540 | #c.NotebookNotary.secret_file = u'' 541 | 542 | #------------------------------------------------------------------------------ 543 | # KernelSpecManager(LoggingConfigurable) configuration 544 | #------------------------------------------------------------------------------ 545 | 546 | ## If there is no Python kernelspec registered and the IPython kernel is 547 | # available, ensure it is added to the spec list. 548 | #c.KernelSpecManager.ensure_native_kernel = True 549 | 550 | ## The kernel spec class. This is configurable to allow subclassing of the 551 | # KernelSpecManager for customized behavior. 552 | #c.KernelSpecManager.kernel_spec_class = 'jupyter_client.kernelspec.KernelSpec' 553 | 554 | ## Whitelist of allowed kernel names. 555 | # 556 | # By default, all installed kernels are allowed. 557 | #c.KernelSpecManager.whitelist = set([]) --------------------------------------------------------------------------------