├── .gitignore
├── experimental
    ├── kafka
    │   ├── heap.yml
    │   ├── reload.yml
    │   ├── config_changes.yml
    │   ├── kafka.service.j2
    │   ├── service_play.yml
    │   ├── configure_w_service.yml
    │   └── kafka.service.example.j2
    ├── ansible-playbook_example.sh
    ├── run_ansible_container.sh
    ├── plays
    │   ├── ec2_terminate.yml
    │   ├── testplay.yml
    │   └── ec2_create.yml
    ├── ansible-example.cfg
    ├── testplay.yml
    ├── Dockerfile
    └── example-aws.inv
├── roles
    ├── vw
    │   ├── tasks
    │   │   ├── apt.yml
    │   │   ├── test.yml
    │   │   ├── main.yml
    │   │   └── install.yml
    │   └── vars
    │   │   └── main.yml
    ├── kafka
    │   ├── meta
    │   │   └── main.yml
    │   ├── handlers
    │   │   ├── main.yml
    │   │   └── service_module_example.yml
    │   ├── tasks
    │   │   ├── info.yml
    │   │   ├── uninstall.yml
    │   │   ├── configure.yml
    │   │   ├── stop.yml
    │   │   ├── start.yml
    │   │   ├── service.yml
    │   │   ├── install.yml
    │   │   └── main.yml
    │   ├── vars
    │   │   └── main.yml
    │   └── templates
    │   │   ├── kafka-server-start.sh.j2
    │   │   └── server.properties.j2
    ├── ec2
    │   ├── vars
    │   │   └── main.yml
    │   └── tasks
    │   │   ├── start.yml
    │   │   ├── stop.yml
    │   │   ├── terminate.yml
    │   │   ├── launch.yml
    │   │   ├── main.yml
    │   │   └── describe.yml
    └── zookeeper
    │   ├── templates
    │       ├── myid.j2
    │       ├── zoo_sample.cfg.j2
    │       └── log4j.properties.j2
    │   ├── tasks
    │       ├── info.yml
    │       ├── stop.yml
    │       ├── start.yml
    │       ├── uninstall.yml
    │       ├── configure.yml
    │       ├── service.yml
    │       ├── install.yml
    │       └── main.yml
    │   ├── handlers
    │       └── main.yml
    │   └── vars
    │       └── main.yml
├── ec2.yml
├── ec2_vars_vw_ajh.yml
├── example_ec2_vars.yml
├── inventory
    ├── example.inv
    └── aws.inv
├── ansible_example.cfg
├── vw.yml
├── zookeeper.yml
├── conf
    └── Dockerfile
├── kafka.yml
├── ec2.ini
├── README.md
├── LICENSE
└── ec2.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.retry
2 | 


--------------------------------------------------------------------------------
/experimental/kafka/heap.yml:
--------------------------------------------------------------------------------
1 | # kafka_heap_opts: "-Xmx1G -Xms1G"
2 | 


--------------------------------------------------------------------------------
/roles/vw/tasks/apt.yml:
--------------------------------------------------------------------------------
1 | - name: Testing apt
2 |   apt: "name=make state=installed"
3 | 


--------------------------------------------------------------------------------
/experimental/ansible-playbook_example.sh:
--------------------------------------------------------------------------------
1 | ansible-playbook -i inventory/aws.inv plays/kafka.yml


--------------------------------------------------------------------------------
/roles/kafka/meta/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | dependencies:
3 |   - { role: zookeeper, tags: ['zookeeper'] }
4 | 


--------------------------------------------------------------------------------
/experimental/run_ansible_container.sh:
--------------------------------------------------------------------------------
1 | docker run -it --name ansible-container -v ~/.ssh:/root/.ssh ubuntu


--------------------------------------------------------------------------------
/roles/vw/vars/main.yml:
--------------------------------------------------------------------------------
1 | BUILD_DIR: /home/ubuntu/vw-git
2 | INSTALL_DIR: /usr/local
3 | VW_HOME: /usr/local/vw


--------------------------------------------------------------------------------
/experimental/kafka/reload.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: Reload Kafka
3 | 	service:
4 |     name: kafka
5 |     state: reloaded


--------------------------------------------------------------------------------
/roles/ec2/vars/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | ami_id: ami-62e01e02
3 | cluster_name: "tag_Name_{{ tag_key_vals.Name | replace('-', '_') }}"
4 | 


--------------------------------------------------------------------------------
/roles/vw/tasks/test.yml:
--------------------------------------------------------------------------------
1 | - name: Testing
2 |   file:
3 |     path: "/home/ubuntu/bort"
4 |     owner: ubuntu
5 |     state: present
6 | 


--------------------------------------------------------------------------------
/roles/zookeeper/templates/myid.j2:
--------------------------------------------------------------------------------
1 | {% for host in groups['zookeeper'] %}{% if hostvars[host]['ansible_nodename'] == ansible_hostname %}{{loop.index}}{% endif %}{% endfor %}
2 | 


--------------------------------------------------------------------------------
/ec2.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Execute EC2 roles
 3 |   hosts: localhost
 4 |   connection: local
 5 |   gather_facts: False
 6 |   vars_files:
 7 |     - "{{ vars_file }}"
 8 |   roles:
 9 |     - role: ec2
10 | 


--------------------------------------------------------------------------------
/roles/kafka/handlers/main.yml:
--------------------------------------------------------------------------------
1 | - name: kafka_service_status report
2 |   include: service.yml stage='after start/stop action'
3 | #
4 | #- name: kafka_stop_status report
5 | #  include: service.yml stage='after stop action'
6 | 


--------------------------------------------------------------------------------
/ec2_vars_vw_ajh.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | key_pair: adam-heller
 3 | instance_type: t2.micro
 4 | region: us-west-2
 5 | security_group_id: sg-2b6cf950
 6 | num_instances: 2
 7 | subnet_id: subnet-d7db7ab0
 8 | tag_key_vals:
 9 |   Name: aj-ec2-test
10 |   class: vw
11 | 


--------------------------------------------------------------------------------
/roles/kafka/tasks/info.yml:
--------------------------------------------------------------------------------
1 | - name: Kafka exists status report
2 |   debug:
3 |     msg: Kafka is {% if kafka.stat.exists == False %}not {% endif %}installed
4 | 
5 | - name: Call service task
6 |   include: service.yml stage='info'
7 |   when: kafka.stat.exists
8 | 


--------------------------------------------------------------------------------
/roles/zookeeper/tasks/info.yml:
--------------------------------------------------------------------------------
1 | - name: Zookeeper exists status report
2 |   debug:
3 |     msg: Zookeeper is {% if zoo.stat.exists == False %}not {% endif %}installed
4 | 
5 | - name: Call service task
6 |   include: service.yml stage='info'
7 |   when: zoo.stat.exists
8 | 


--------------------------------------------------------------------------------
/roles/kafka/tasks/uninstall.yml:
--------------------------------------------------------------------------------
 1 | - name: Delete kafka
 2 |   file:
 3 |     path: "{{ KAFKA_HOME }}"
 4 |     state: absent
 5 |     owner: ubuntu
 6 | 
 7 | - name: Delete kafka log dir
 8 |   file:
 9 |     path: "{{ KAFKA_HOME }}"
10 |     state: absent
11 |     owner: ubuntu
12 | 


--------------------------------------------------------------------------------
/example_ec2_vars.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | key_pair: ronak-nathani
 3 | instance_type: t2.medium
 4 | region: us-west-2
 5 | security_group_id: sg-19127861
 6 | num_instances: 2
 7 | subnet_id: subnet-3a78835f
 8 | tag_key_vals:
 9 |   Name: ronak-ec2-role
10 |   class: kafka
11 |   class2: zookeeper
12 | 


--------------------------------------------------------------------------------
/inventory/example.inv:
--------------------------------------------------------------------------------
 1 | [zookeeper]
 2 | aws1 ansible_host=35.35.35.37
 3 | aws2 ansible_host=35.35.35.36
 4 | aws3 ansible_host=35.35.35.35
 5 | 
 6 | [kafka]
 7 | aws1
 8 | aws2
 9 | aws3
10 | 
11 | [all:vars]
12 | ansible_ssh_private_key_file=/Users/ronak/.ssh/my-pem-key.pem
13 | ansible_user=ubuntu
14 | 


--------------------------------------------------------------------------------
/roles/zookeeper/tasks/stop.yml:
--------------------------------------------------------------------------------
1 | - name: Check if zookeeper is running or not
2 |   include: service.yml stage='before stop action'
3 | 
4 | - name: Stop zookeeper service
5 |   shell: "{{ ZOOKEEPER_STOP_COMMAND }}"
6 |   when: zoo_service_status.rc == 0
7 |   notify:
8 |     - zoo_service_status report
9 | 


--------------------------------------------------------------------------------
/inventory/aws.inv:
--------------------------------------------------------------------------------
 1 | [zookeeper]
 2 | aws1 ansible_host=35.166.162.251
 3 | aws2 ansible_host=35.164.128.212
 4 | aws3 ansible_host=52.34.46.246
 5 | 
 6 | [kafka]
 7 | aws1
 8 | aws2
 9 | aws3
10 | 
11 | [all:vars]
12 | ansible_ssh_private_key_file=/Users/ronak/.ssh/ronak-nathani.pem
13 | ansible_user=ubuntu
14 | 


--------------------------------------------------------------------------------
/roles/zookeeper/handlers/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - name: zoo_service_status report
3 |   include: service.yml stage='after start/stop action'
4 | 
5 | # this seems to be a bug in ansible 2.2.1 - handlers after the first one are not recognized
6 | # - name: zoo_start_status report
7 | #   include: service.yml stage='after start action'


--------------------------------------------------------------------------------
/roles/zookeeper/tasks/start.yml:
--------------------------------------------------------------------------------
 1 | - name: Check if zookeeper is running or not
 2 |   include: service.yml stage='before start action'
 3 | 
 4 | - name: Start zookeeper service
 5 |   shell: "{{ ZOOKEEPER_START_COMMAND }}" 
 6 |   ignore_errors: yes
 7 |   when: zoo_service_status.rc != 0
 8 |   notify:
 9 |     - zoo_service_status report 
10 | 


--------------------------------------------------------------------------------
/roles/kafka/tasks/configure.yml:
--------------------------------------------------------------------------------
 1 | - name: Configure server.properties
 2 |   template:
 3 |     src: server.properties.j2
 4 |     dest: "{{ KAFKA_HOME }}/config/server.properties"
 5 | 
 6 | - name: Configure kafka-server-start.sh
 7 |   template:
 8 |     src: kafka-server-start.sh.j2
 9 |     dest: "{{ KAFKA_HOME }}/bin/kafka-server-start.sh"
10 | 


--------------------------------------------------------------------------------
/roles/kafka/tasks/stop.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 |  - name: Check if kafka is running or not
 3 |    include: service.yml stage='before stop action'
 4 | 
 5 |  - name: Stop kafka service
 6 |    shell: "{{ KAFKA_STOP_COMMAND }}"
 7 |    ignore_errors: yes
 8 |    when: kafka_service_status.rc == 0
 9 |    notify:
10 |      - kafka_service_status report
11 | 
12 | 


--------------------------------------------------------------------------------
/experimental/plays/ec2_terminate.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Terminate instances
 3 |   hosts: localhost
 4 |   connection: local
 5 |   gather_facts: True
 6 |   tasks:
 7 |     - ec2:
 8 |         state: 'absent'
 9 |         region: us-west-2
10 |         instance_ids: "{{ hostvars[item].ec2_id }}"
11 |       with_items: "{{ groups['tag_class_kafka'] }}"
12 | 


--------------------------------------------------------------------------------
/roles/kafka/tasks/start.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 |  - name: Check if kafka is running or not
 3 |    include: service.yml stage='before start action'
 4 | 
 5 |  - name: Start kafka service
 6 |    shell: "{{ KAFKA_START_COMMAND }}"
 7 |    ignore_errors: yes
 8 |    when: kafka_service_status.rc != 0
 9 |    notify:
10 |      - kafka_service_status report
11 | 
12 | 


--------------------------------------------------------------------------------
/roles/kafka/tasks/service.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Check if kafka service is running
 3 |   shell: ps aux | grep kafka | grep -v grep
 4 |   ignore_errors: yes
 5 |   register: kafka_service_status
 6 | 
 7 | - name: Kafka sevice (running or stopped) report
 8 |   debug:
 9 |     msg: "{{ stage }} | Kafka server is {% if kafka_service_status.rc != 0 %}not {% endif %}running"
10 | 


--------------------------------------------------------------------------------
/roles/vw/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | - name: Check if vw is installed
 2 |   stat:
 3 |     path: "{{ VW_HOME }}"
 4 |   register: vw
 5 |   tags: ['test', 'start', 'stop', 'install', 'uninstall', 'info']
 6 | 
 7 | - name: Install Vowpal Wabbit
 8 |   include: install.yml
 9 |   tags: install
10 |   when: vw.stat.exists == False
11 | 
12 | - name: Testing apt
13 |   include: test.yml
14 |   tags: test


--------------------------------------------------------------------------------
/experimental/ansible-example.cfg:
--------------------------------------------------------------------------------
 1 | [defaults]
 2 | roles_path = ./roles
 3 | host_key_checking = False
 4 | ansible_user=ubuntu
 5 | ansible_ssh_private_key_file=~/.ssh/aws/TA-AWS-VA_free.pem
 6 | 
 7 | # fact chaching
 8 | gathering = smart
 9 | fact_caching = jsonfile
10 | fact_caching_connection = ~/.ansible/cache
11 | fact_caching_timeout = 86400
12 | 
13 | # seconds
14 | [privilege_escalation]
15 | become = True
16 | 


--------------------------------------------------------------------------------
/ansible_example.cfg:
--------------------------------------------------------------------------------
 1 | [defaults]
 2 | host_key_checking = False
 3 | ansible_ssh_private_key_file=<path-to-key-pair>
 4 | ansible_user=ubuntu
 5 | log_path=/var/log/ansible.log
 6 | roles_path=<path-to-ansible-roles>
 7 | 
 8 | [facts_gathering]
 9 | gathering = smart
10 | fact_caching = jsonfile
11 | fact_caching_connection = ~/.ansible/cache 
12 | fact_caching_timeout = 86400
13 | 
14 | [privilege_escalation]
15 | become = True
16 | 


--------------------------------------------------------------------------------
/experimental/kafka/config_changes.yml:
--------------------------------------------------------------------------------
1 | - name: notify systemd of config changes
2 |   service: name=kafka state=reloaded enabled=yes
3 |   when: systemd_config.changed == True
4 | 
5 |   # not sure if the above will reload the newly made service file or not
6 |   # as a last option (test and see), you can use this one:
7 | - name: notify systemd of config changes
8 |   command: systemctl daemon-reload
9 |   when: systemd_config.changed == True


--------------------------------------------------------------------------------
/experimental/testplay.yml:
--------------------------------------------------------------------------------
1 | - hosts: zookeeper
2 |   gather_facts: true
3 |   tasks:
4 | #    - debug:
5 | #        msg: broker.id={% for host in groups['kafka'] %}{% if hostvars[host]['ansible_nodename'] == ansible_hostname %}{{loop.index}}{% endif %}{% endfor %}
6 |     - debug:
7 |         msg: "zs={% for host in groups['zookeeper'] %}{{ hostvars[host]['ansible_eth0']['ipv4']['address'] }}:2181{% if not loop.last %},{% endif %}{% endfor %}"
8 | 


--------------------------------------------------------------------------------
/roles/zookeeper/tasks/uninstall.yml:
--------------------------------------------------------------------------------
 1 | - name: Delete zookeeper
 2 |   file:
 3 |     path: "{{ ZOOKEEPER_HOME }}"
 4 |     state: absent
 5 |     owner: ubuntu
 6 | 
 7 | - name: Delete zookeeper data dir
 8 |   file:
 9 |     path: "{{ ZOOKEEPER_DATA_DIR }}"
10 |     state: absent
11 |     owner: ubuntu
12 | 
13 | - name: Delete zookeeper log dir
14 |   file:
15 |     path: "{{ ZOOKEEPER_HOME }}"
16 |     state: absent
17 |     owner: ubuntu
18 | 


--------------------------------------------------------------------------------
/experimental/kafka/kafka.service.j2:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=Kafka Daemon
 3 | 
 4 | [Service]
 5 | Type=simple
 6 | User={{ UNIX_KAFKA_USER }}
 7 | Group={{ UNIX_KAFKA_GROUP }}
 8 | Restart=on-failure
 9 | Environment="JMX_PORT={{ JMX_PORT }}"
10 | ExecStart={{ KAFKA_HOME }}/bin/kafka-server-start.sh {{ KAFKA_HOME }}/conf/server.properties
11 | ExecStop={{ KAFKA_HOME }}/bin/kafka-server-stop.sh
12 | 
13 | [Install]
14 | WantedBy=multi-user.target
15 | 


--------------------------------------------------------------------------------
/experimental/plays/testplay.yml:
--------------------------------------------------------------------------------
1 | - hosts: zookeeper
2 |   gather_facts: true
3 |   tasks:
4 | #    - debug:
5 | #        msg: broker.id={% for host in groups['kafka'] %}{% if hostvars[host]['ansible_nodename'] == ansible_hostname %}{{loop.index}}{% endif %}{% endfor %}
6 |     - debug:
7 |         msg: "zs={% for host in groups['zookeeper'] %}{{ hostvars[host]['ansible_eth0']['ipv4']['address'] }}:2181{% if not loop.last %},{% endif %}{% endfor %}"
8 | 


--------------------------------------------------------------------------------
/experimental/kafka/service_play.yml:
--------------------------------------------------------------------------------
 1 | - name: create kafka systemd service
 2 |   template: src=kafka.service.j2 dest={{kafka_systemd_service}} mode=644
 3 |     register: systemd_config
 4 | 
 5 | 
 6 | # - name: start kafka
 7 | #   service: name=kafka state=started enabled=yes
 8 | 
 9 | # - name: restart kafka
10 | #   service: name=kafka state=restarted enabled=yes
11 | 
12 | # - name: reload kafka service
13 | #   service: name=kafka state=reloaded enabled=yes


--------------------------------------------------------------------------------
/roles/zookeeper/tasks/configure.yml:
--------------------------------------------------------------------------------
 1 | - name: Configure zoo.cfg
 2 |   template:
 3 |     src: zoo_sample.cfg.j2
 4 |     dest: "{{ INSTALLATION_DIR }}/zookeeper/conf/zoo.cfg"
 5 | 
 6 | - name: Configure log4j.properties
 7 |   template:
 8 |     src: log4j.properties.j2
 9 |     dest: "{{ INSTALLATION_DIR }}/zookeeper/conf/log4j.properties"
10 | 
11 | - name: Configure myid
12 |   template:
13 |     src: myid.j2
14 |     dest: "{{ ZOOKEEPER_DATA_DIR }}/myid"
15 | 


--------------------------------------------------------------------------------
/roles/kafka/handlers/service_module_example.yml:
--------------------------------------------------------------------------------
 1 | #- name: notify systemd of config changes
 2 | #  service: name=kafka state=reloaded enabled=yes
 3 | #    when: systemd_config.changed == True
 4 |       
 5 |         # not sure if the above will reload the newly made service file or not
 6 |         #   # as a last option (test and see), you can use this one:
 7 | #- name: notify systemd of config changes
 8 | #  command: systemctl daemon-reload
 9 | #  when: systemd_config.changed == True
10 | 


--------------------------------------------------------------------------------
/experimental/kafka/configure_w_service.yml:
--------------------------------------------------------------------------------
 1 | - name: Configure server.properties
 2 |   template:
 3 |     src: server.properties.j2
 4 |     dest: "{{ KAFKA_HOME }}/config/server.properties"
 5 | 
 6 | - name: Configure kafka-server-start.sh
 7 |   template:
 8 |     src: kafka-server-start.sh.j2
 9 |     dest: "{{ KAFKA_HOME }}/bin/kafka-server-start.sh"
10 | 
11 | #- name: Configure kafka.service.j2
12 | #  template: 
13 | #    src: kafka.service.j2
14 | #    dest: /etc/systemd/kafka.service
15 | #    mode: 644
16 | 


--------------------------------------------------------------------------------
/roles/ec2/tasks/start.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Get stopped C2 instances for the given tag
 3 |   ec2_remote_facts:
 4 |     region: "{{ region }}"
 5 |     filters:
 6 |       "tag:Name": "{{ tag_key_vals.Name }}"
 7 |       instance-state-name: stopped
 8 |   register: ec2_info
 9 | 
10 | - name: Start EC2 instances specified by the given tag
11 |   ec2:
12 |     state: running
13 |     region: "{{ region }}"
14 |     wait: True
15 |     instance_ids: "{{ item.id }}"
16 |   with_items: "{{ ec2_info.instances }}"
17 | 


--------------------------------------------------------------------------------
/roles/ec2/tasks/stop.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Get running EC2 instances for the given tag
 3 |   ec2_remote_facts:
 4 |     region: "{{ region }}"
 5 |     filters:
 6 |       "tag:Name": "{{ tag_key_vals.Name }}"
 7 |       instance-state-name: running
 8 |   register: ec2_info
 9 | 
10 | - name: Stop EC2 instances specified by the given tag
11 |   ec2:
12 |     state: stopped
13 |     region: "{{ region }}"
14 |     wait: True
15 |     instance_ids: "{{ item.id }}"
16 |   with_items: "{{ ec2_info.instances }}"
17 | 


--------------------------------------------------------------------------------
/roles/ec2/tasks/terminate.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Dissociate and release EIPs associated with instances
 3 |   ec2_eip:
 4 |     state: absent
 5 |     region: "{{ region }}"
 6 |     device_id: "{{ hostvars[item].ec2_id }}"
 7 |     release_on_disassociation: True
 8 |   with_items: "{{ groups[cluster_name] }}"
 9 | 
10 | - name: Terminate instances
11 |   ec2:
12 |     state: 'absent'
13 |     region: "{{ region }}"
14 |     instance_ids: "{{ hostvars[item].ec2_id }}"
15 |   with_items: "{{ groups[cluster_name] }}"
16 | 


--------------------------------------------------------------------------------
/vw.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Creating host group vowpal wabbit from dynamic inventory
 3 |   hosts: localhost
 4 |   connection: local
 5 |   tags: ['info', 'install', 'start', 'stop', 'uninstall']
 6 |   vars:
 7 |     vw_tag_name: "tag_{{ vw_tag | replace('-', '_') }}"
 8 |   tasks:
 9 |     - add_host: name={{ item }} groups=vw
10 |       with_items: "{{ groups[vw_tag_name] }}"
11 | 
12 | - name: Execute Vowpal Wabbit roles
13 |   hosts: vw
14 |   user: ubuntu
15 |   become: true
16 |   become_method: sudo
17 |   roles:
18 |     - role: vw
19 | 


--------------------------------------------------------------------------------
/zookeeper.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Creating host group zookeeper from dynamic inventory
 3 |   hosts: localhost
 4 |   connection: local
 5 |   tags: ['info', 'install', 'start', 'stop', 'uninstall']
 6 |   vars:
 7 |     zookeeper_tag_name: "tag_{{ zookeeper_tag | replace('-', '_') }}"
 8 |   tasks:
 9 |     - add_host: name={{ item }} groups=zookeeper
10 |       with_items: "{{ groups[zookeeper_tag_name] }}"
11 | 
12 | - name: Execute Zookeeper roles
13 |   hosts: zookeeper
14 |   user: ubuntu
15 |   become: true
16 |   become_method: sudo
17 |   roles:
18 |     - role: zookeeper
19 | 


--------------------------------------------------------------------------------
/experimental/kafka/kafka.service.example.j2:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=Kafka Daemon
 3 | After=zookeeper.service
 4 | 
 5 | [Service]
 6 | Type=simple
 7 | User={{unix_kafka_user}}
 8 | Group={{unix_kafka_group}}
 9 | LimitNOFILE=32768
10 | Restart=on-failure
11 | Environment="KAFKA_HEAP_OPTS={{kafka_heap_opts}}"
12 | {% if jmx_enabled %}Environment="JMX_PORT={{ kafka_jmx_port }}"{% endif %}
13 | ExecStart={{generic_kafka_home}}/bin/kafka-server-start.sh {{kafka_conf_dir}}/server.properties
14 | ExecStop={{generic_kafka_home}}/bin/kafka-server-stop.sh
15 | 
16 | 
17 | [Install]
18 | WantedBy=multi-user.target


--------------------------------------------------------------------------------
/roles/zookeeper/vars/main.yml:
--------------------------------------------------------------------------------
 1 | ZOOKEEPER_VERSION: 3.4.9
 2 | ZOOKEEPER_URL: http://www.us.apache.org/dist/zookeeper/zookeeper-{{ ZOOKEEPER_VERSION }}/zookeeper-{{ ZOOKEEPER_VERSION }}.tar.gz
 3 | ZOOKEEPER_TAR: zookeeper-{{ ZOOKEEPER_VERSION }}.tar.gz
 4 | INSTALLATION_DIR: /usr/local
 5 | ZOOKEEPER_DATA_DIR: /var/lib/zookeeper
 6 | ZOOKEEPER_LOG_DIR: /var/log/zookeeper
 7 | ZOOKEEPER_HOME: "{{ INSTALLATION_DIR }}/zookeeper"
 8 | 
 9 | # Zookeeper service commands
10 | ZOOKEEPER_START_COMMAND: "{{ ZOOKEEPER_HOME }}/bin/zkServer.sh start"
11 | ZOOKEEPER_STOP_COMMAND: "{{ ZOOKEEPER_HOME }}/bin/zkServer.sh stop"
12 | 


--------------------------------------------------------------------------------
/experimental/plays/ec2_create.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: testing EC2 module
 3 |   hosts: localhost
 4 |   gather_facts: False
 5 |   connection: local
 6 |   user: ronak
 7 |   tasks:
 8 |     - name: Launch instances
 9 |       ec2:
10 |         key_name: ronak-nathani
11 |         instance_type: t2.large
12 |         image: ami-62e01e02
13 |         region: us-west-2
14 |         wait: yes
15 |         group: ronak-ansible
16 |         count: 3
17 |         vpc_subnet_id: subnet-3a78835f
18 |         assign_public_ip: yes
19 |         instance_tags:
20 |           Name: ronak-ansible2
21 |           config_manager: zookeeper
22 |           queue: kafka
23 | 


--------------------------------------------------------------------------------
/roles/ec2/tasks/launch.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Launch EC2 instances
 3 |   ec2:
 4 |     key_name: "{{ key_pair }}"
 5 |     instance_type: "{{ instance_type }}"
 6 |     image: "{{ ami_id }}"
 7 |     region: "{{ region }}"
 8 |     wait: yes
 9 |     group_id: "{{ security_group_id }}"
10 |     count: "{{ num_instances }}"
11 |     vpc_subnet_id: "{{ subnet_id }}"
12 |     assign_public_ip: yes
13 |     instance_tags:
14 |       "{{ tag_key_vals }}"
15 |   register: ec2
16 | 
17 | - name: associate new elastic IPs with each of the instances
18 |   ec2_eip:
19 |     device_id: "{{ item }}"
20 |     region: "{{ region }}"
21 |   with_items: "{{ ec2.instance_ids }}"
22 | 


--------------------------------------------------------------------------------
/roles/zookeeper/tasks/service.yml:
--------------------------------------------------------------------------------
 1 | - name: Check if zookeeper service is running
 2 |   shell: ps aux | grep zookeeper | grep -v grep
 3 |   ignore_errors: yes
 4 |   register: zoo_service_status
 5 | 
 6 | - name: Zookeeper sevice (running or stopped) report
 7 |   debug:
 8 |     msg: "{{ stage }} | Zookeeper server is {% if zoo_service_status.rc != 0 %}not {% endif %}running"
 9 | 
10 | - name: Check server mode - leader or follower
11 |   shell: echo srvr | nc localhost 2181 | grep Mode
12 |   ignore_errors: yes
13 |   register: server_mode
14 | 
15 | - name: Zookeeper server_mode report
16 |   debug:
17 |     msg: "{{ server_mode.stdout }}"
18 |   when: zoo_service_status.rc == 0


--------------------------------------------------------------------------------
/roles/ec2/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Fail play if the cluster tag Name is not defined
 3 |   fail:
 4 |     msg: "Specify 'Name' for the cluster in tag_key_vals variable"
 5 |   when: tag_key_vals.Name is undefined
 6 |   tags: ['launch']
 7 | 
 8 | - name: Launch EC2 instances
 9 |   include: launch.yml
10 |   tags: ['launch']
11 | 
12 | - name: Start stopped EC2 instances
13 |   include: start.yml
14 |   tags: ['start']
15 | 
16 | - name: Stop EC2 instances
17 |   include: stop.yml
18 |   tags: ['stop']
19 | 
20 | - name: Terminate EC2 instances
21 |   include: terminate.yml
22 |   tags: ['terminate']
23 | 
24 | - name: Describe EC2 instances info
25 |   include: describe.yml
26 |   tags: ['describe']
27 | 


--------------------------------------------------------------------------------
/roles/kafka/tasks/install.yml:
--------------------------------------------------------------------------------
 1 | - name: Download Kafka Tar
 2 |   get_url:
 3 |     url: "{{ KAFKA_URL }}"
 4 |     dest: "{{ INSTALLATION_DIR }}"
 5 |     mode: 0644
 6 | 
 7 | - name: Unpack Kafka tar
 8 |   shell: tar zxvf {{ INSTALLATION_DIR }}/{{ KAFKA_TAR }} -C {{ INSTALLATION_DIR }}
 9 | 
10 | - name: Remove Kafka tar
11 |   file:
12 |     path: "{{ INSTALLATION_DIR }}/{{ KAFKA_TAR }}"
13 |     owner: ubuntu
14 |     state: absent
15 | 
16 | - name: Move {{ KAFKA_SOURCE_FOLDER }} to kafka
17 |   shell: mv {{ INSTALLATION_DIR }}/{{ KAFKA_SOURCE_FOLDER }} {{ INSTALLATION_DIR }}/kafka
18 | 
19 | - name: Create Kafka log directory
20 |   file:
21 |     path: "{{ KAFKA_LOG_DIR }}"
22 |     state: directory
23 |     owner: ubuntu
24 |     mode: 0644
25 | 


--------------------------------------------------------------------------------
/conf/Dockerfile:
--------------------------------------------------------------------------------
 1 | ########################################################
 2 | # Dockerfile for ansible-playbook
 3 | # Based on debian
 4 | ########################################################
 5 | 
 6 | FROM ubuntu
 7 | 
 8 | MAINTAINER Ronak Nathani
 9 | 
10 | RUN apt-get update \
11 | 	&& apt-get install -y software-properties-common \
12 | 	&& apt-add-repository ppa:ansible/ansible \
13 | 	&& apt-get update \
14 | 	&& apt-get install -y ansible \
15 | 	&& apt-get install -y python-pip \
16 | 	&& apt-get install -y vim \
17 |     && apt-get install -y git
18 | 
19 | RUN pip install boto
20 | 
21 | COPY . /root/ansible-playbook
22 | 
23 | RUN rm /etc/ansible/hosts
24 | RUN mkdir /etc/ansible/hosts/
25 | 
26 | COPY ansible_example.cfg /etc/ansible/ansible.cfg
27 | COPY ec2.py /etc/ansible/hosts/
28 | COPY ec2.ini /etc/ansible/hosts/
29 | 


--------------------------------------------------------------------------------
/roles/kafka/vars/main.yml:
--------------------------------------------------------------------------------
 1 | KAFKA_VERSION: 0.10.1.1
 2 | KAFKA_SCALA_VERSION: "2.11"
 3 | KAFKA_URL: "http://www.us.apache.org/dist/kafka/{{ KAFKA_VERSION }}/kafka_{{ KAFKA_SCALA_VERSION }}-{{ KAFKA_VERSION }}.tgz"
 4 | KAFKA_TAR: kafka_{{ KAFKA_SCALA_VERSION }}-{{ KAFKA_VERSION }}.tgz
 5 | KAFKA_SOURCE_FOLDER: kafka_{{ KAFKA_SCALA_VERSION }}-{{ KAFKA_VERSION }}
 6 | INSTALLATION_DIR: /usr/local
 7 | KAFKA_HOME: "{{ INSTALLATION_DIR }}/kafka"
 8 | KAFKA_LOG_DIR: /var/log/kafka
 9 | 
10 | # Kafka service commands
11 | KAFKA_STOP_COMMAND: "{{ KAFKA_HOME}}/bin/kafka-server-stop.sh; sleep 20"
12 | KAFKA_START_COMMAND: "{{ KAFKA_HOME }}/bin/kafka-server-start.sh -daemon {{ KAFKA_HOME }}/config/server.properties"
13 | 
14 | # Unix user and group
15 | UNIX_KAFKA_USER: ubuntu
16 | UNIX_KAFKA_GROUP: ubuntu
17 | 
18 | JMX_ENABLED: true
19 | JMX_PORT: 9999
20 | 


--------------------------------------------------------------------------------
/experimental/Dockerfile:
--------------------------------------------------------------------------------
 1 | ########################################################
 2 | # Dockerfile for ansible-playbook
 3 | # Based on debian
 4 | ########################################################
 5 | 
 6 | FROM debian:jessie
 7 | 
 8 | MAINTAINER Ronak Nathani
 9 | 
10 | RUN apt-get update \
11 | #	&& apt-get install -y software-properties-common \
12 | #	&& apt-add-repository ppa:ansible/ansible \
13 | #	&& apt-get update \
14 | #	&& apt-get install -y ansible \
15 | #	&& apt-get install -y python-pip \
16 | #	&& apt-get install -y vim \
17 | #    && apt-get install -y git
18 | 
19 | RUN pip install boto
20 | 
21 | git clone https://github.com/InsightDataScience/ansible-playbook.git /root
22 | 
23 | mkdir -p /etc/ansible/hosts
24 | 
25 | COPY ansible_example.cfg /etc/ansible/ansible.cfg
26 | COPY ec2.py /etc/ansible/hosts/
27 | COPY ec2.ini /etc/ansible/hosts/
28 | 


--------------------------------------------------------------------------------
/experimental/example-aws.inv:
--------------------------------------------------------------------------------
 1 | aws1 ansible_host=54.174.190.147 
 2 | aws2 ansible_host=192.168.33.31
 3 | aws3 ansible_host=192.168.33.32
 4 | aws4 ansible_host=192.168.33.33
 5 | #k1 ansible_host=192.168.33.41 ansible_ssh_private_key_file=~/dev/machines/.vagrant/machines/k1/virtualbox/private_key
 6 | #k2 ansible_host=192.168.33.42 ansible_ssh_private_key_file=~/dev/machines/.vagrant/machines/k2/virtualbox/private_key
 7 | #k3 ansible_host=192.168.33.43 ansible_ssh_private_key_file=~/dev/machines/.vagrant/machines/k3/virtualbox/private_key
 8 | [all:vars]
 9 | ansible_user=ubuntu
10 | ansible_ssh_private_key_file=~/.ssh/aws/TA-AWS-VA_free.pem
11 | [test_group]
12 | sw2
13 | sw3
14 | [spark_master]
15 | sm
16 | [spark_workers]
17 | sw[1:3]
18 | [spark_nodes:children]
19 | spark_master
20 | spark_workers
21 | [zoo_hosts]
22 | aws1
23 | #k1
24 | #k2
25 | #k3
26 | [kafka_hosts]
27 | aws1
28 | #sw[1:3]


--------------------------------------------------------------------------------
/kafka.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Creating host group zookeeper from dynamic inventory
 3 |   hosts: localhost
 4 |   connection: local
 5 |   tags: ['info', 'install', 'start', 'stop', 'uninstall']
 6 |   vars:
 7 |     zookeeper_tag_name: "tag_{{ zookeeper_tag | replace('-', '_') }}"
 8 |   tasks:
 9 |     - add_host: name={{ item }} groups=zookeeper
10 |       with_items: "{{ groups[zookeeper_tag_name] }}"
11 | 
12 | - name: Creating host group kafka from dynamic inventory
13 |   hosts: localhost
14 |   connection: local
15 |   tags: ['info', 'install', 'start', 'stop', 'uninstall']
16 |   vars:
17 |     kafka_tag_name: "tag_{{ kafka_tag | replace('-', '_') }}"
18 |   tasks:
19 |     - add_host: name={{ item }} groups=kafka
20 |       with_items: "{{ groups[kafka_tag_name] }}"
21 | 
22 | - name: Execute Kafka roles
23 |   hosts: kafka
24 |   user: ubuntu
25 |   become: true
26 |   become_method: sudo
27 |   roles:
28 |     - role: kafka
29 | 


--------------------------------------------------------------------------------
/roles/zookeeper/tasks/install.yml:
--------------------------------------------------------------------------------
 1 | - name: Download Zookeeper Tar
 2 |   get_url:
 3 |     url: "{{ ZOOKEEPER_URL }}"
 4 |     dest: "{{ INSTALLATION_DIR }}"
 5 |     mode: 0644
 6 | 
 7 | - name: Unpack Zookeeper tar
 8 |   shell: tar zxvf {{ INSTALLATION_DIR }}/{{ ZOOKEEPER_TAR }} -C {{ INSTALLATION_DIR }}
 9 | 
10 | - name: Remove Zookeeper tar
11 |   file:
12 |     path: "{{ INSTALLATION_DIR }}/{{ ZOOKEEPER_TAR }}"
13 |     owner: ubuntu
14 |     state: absent
15 | 
16 | - name: Move zookeeper-{{ ZOOKEEPER_VERSION }} to zookeeper
17 |   shell: mv {{ INSTALLATION_DIR }}/zookeeper-{{ ZOOKEEPER_VERSION }} {{ INSTALLATION_DIR }}/zookeeper
18 | 
19 | - name: Create data directory
20 |   file:
21 |     path: "{{ ZOOKEEPER_DATA_DIR }}"
22 |     state: directory
23 |     owner: ubuntu
24 |     mode: 0644
25 | 
26 | - name: Create Zookeeper log directory
27 |   file:
28 |     path: "{{ ZOOKEEPER_LOG_DIR }}"
29 |     state: directory
30 |     owner: ubuntu
31 |     mode: 0644
32 | 


--------------------------------------------------------------------------------
/roles/kafka/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | - name: Check if kafka is installed
 2 |   stat:
 3 |     path: "{{ KAFKA_HOME }}"
 4 |   register: kafka
 5 |   tags: ['start', 'stop', 'install', 'uninstall', 'info']
 6 | 
 7 | - name: Install Kakfa
 8 |   include: install.yml
 9 |   tags: install
10 |   when: kafka.stat.exists == False
11 | 
12 | - name: Configure Kakfa
13 |   include: configure.yml
14 |   tags: install
15 |   when: kafka.stat.exists == False
16 | 
17 | - name: Fail play if Kakfa is not installed
18 |   fail:
19 |     msg: "Kakfa is not installed"
20 |   when: kafka.stat.exists == False
21 |   tags: ['start', 'stop', 'uninstall', 'info']
22 | 
23 | - name: Start kafka
24 |   include: start.yml
25 |   tags: start
26 | 
27 | - name: Stop kafka
28 |   include: stop.yml
29 |   tags: ['stop', 'uninstall']
30 | 
31 | - name: Uninstall kafka
32 |   include: uninstall.yml
33 |   tags: uninstall
34 |   when: kafka.stat.exists
35 | 
36 | - name: Kakfa info
37 |   include: info.yml
38 |   tags: info
39 | 


--------------------------------------------------------------------------------
/roles/vw/tasks/install.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Ensure vw's dependencies are installed
 3 |   become: yes
 4 |   apt: "name={{ item }} state=installed"
 5 |   with_items:
 6 |     - make
 7 |     - libboost-program-options-dev 
 8 |     - zlib1g-dev
 9 |     - libboost-python-dev
10 |     - clang
11 |   register: vw_dependencies_deb
12 |   when: ansible_os_family == 'Debian'
13 | 
14 | - name: Clone the vw repo
15 |   become: no
16 |   git:
17 |     repo: 'git://github.com/JohnLangford/vowpal_wabbit.git'
18 |     dest: "{{ BUILD_DIR }}"
19 |   register: vw_cloned
20 |   when: vw_dependencies_deb|success
21 | 
22 | - name: Build vw
23 |   become: no
24 |   command: make CXX=clang++ prefix={{ INSTALL_DIR }} {{ item }}
25 |   args:
26 |     chdir: vw-git
27 |   with_items:
28 |     - 
29 |     - test
30 |   when: vw_cloned|success
31 |   register: vw_built
32 | 
33 | - name: Install vw
34 |   become: yes
35 |   command: make CXX=clang++ prefix={{ INSTALL_DIR }} install
36 |   args:
37 |     chdir: vw-git
38 |   when: vw_built|success


--------------------------------------------------------------------------------
/roles/zookeeper/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | - name: Check if zookeeper is installed
 2 |   stat:
 3 |     path: "{{ ZOOKEEPER_HOME }}"
 4 |   register: zoo
 5 |   tags: ['start', 'stop', 'install', 'uninstall', 'info']
 6 | 
 7 | - name: Install Zookeeper
 8 |   include: install.yml
 9 |   tags: install
10 |   when: zoo.stat.exists == False
11 | 
12 | - name: Configure Zookeeper
13 |   include: configure.yml
14 |   tags: install
15 |   when: zoo.stat.exists == False
16 | 
17 | - name: Fail play if Zookeeper is not installed
18 |   fail:
19 |     msg: "Zookeeper is not installed"
20 |   when: zoo.stat.exists == False
21 |   tags: ['start', 'stop', 'uninstall', 'info']
22 | 
23 | - name: Start zookeeper
24 |   include: start.yml
25 |   tags: start
26 | 
27 | - name: Stop zookeeper
28 |   include: stop.yml
29 |   tags: ['stop', 'uninstall']
30 | 
31 | - name: Uninstall zookeeper
32 |   include: uninstall.yml
33 |   tags: uninstall
34 |   when: zoo.stat.exists
35 | 
36 | - name: Zookeeper info
37 |   include: info.yml
38 |   tags: info
39 | 


--------------------------------------------------------------------------------
/roles/ec2/tasks/describe.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: EC2 Remote facts
 3 |   ec2_remote_facts:
 4 |     region: "{{ region }}"
 5 |     filters:
 6 |       "tag:Name": "{{ tag_key_vals.Name }}"
 7 |   register: ec2_info
 8 | 
 9 | - name: Gather EC2 info
10 |   set_fact:
11 |     instance_summary:
12 |       public_ip: "{{ item.public_ip_address }}"
13 |       instance_tags: "{{ item.tags }}"
14 |       pubilc_dns: "{{ item.public_dns_name }}"
15 |       private_ip: "{{ item.private_ip_address }}"
16 |       region: "{{ item.region }}"
17 |       state: "{{ item.state }}"
18 |       key_name: "{{ item.key_name }}"
19 |       security_group: "{{ item.groups }}"
20 |       launch_time: "{{ item.launch_time }}"
21 |       vpc: "{{ item.vpc_id }}"
22 |   with_items:
23 |     "{{ ec2_info.instances }}"
24 |   register:
25 |     ec2_set_facts
26 | 
27 | - set_fact: summary="{{ ec2_set_facts.results | map(attribute='ansible_facts.instance_summary') | list }}"
28 | 
29 | - name: "{{ tag_key_vals.Name }} cluster information"
30 |   debug:
31 |     msg: "{{ summary  }}"
32 | 


--------------------------------------------------------------------------------
/roles/zookeeper/templates/zoo_sample.cfg.j2:
--------------------------------------------------------------------------------
 1 | # The number of milliseconds of each tick
 2 | tickTime=2000
 3 | # The number of ticks that the initial 
 4 | # synchronization phase can take
 5 | initLimit=10
 6 | # The number of ticks that can pass between 
 7 | # sending a request and getting an acknowledgement
 8 | syncLimit=5
 9 | # the directory where the snapshot is stored.
10 | # do not use /tmp for storage, /tmp here is just 
11 | # example sakes.
12 | dataDir={{ ZOOKEEPER_DATA_DIR }}
13 | # the port at which the clients will connect
14 | clientPort=2181
15 | {% for host in groups['zookeeper'] %}
16 | server.{{ loop.index }}={{ hostvars[host]['ansible_eth0']['ipv4']['address'] }}:2888:3888
17 | {% endfor %}
18 | # the maximum number of client connections.
19 | # increase this if you need to handle more clients
20 | #maxClientCnxns=60
21 | #
22 | # Be sure to read the maintenance section of the 
23 | # administrator guide before turning on autopurge.
24 | #
25 | # http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
26 | #
27 | # The number of snapshots to retain in dataDir
28 | #autopurge.snapRetainCount=3
29 | # Purge task interval in hours
30 | # Set to "0" to disable auto purge feature
31 | #autopurge.purgeInterval=1
32 | 


--------------------------------------------------------------------------------
/roles/kafka/templates/kafka-server-start.sh.j2:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | export JMX_PORT=${JMX_PORT:-9999}
18 | 
19 | if [ $# -lt 1 ];
20 | then
21 | 	echo "USAGE: $0 [-daemon] server.properties [--override property=value]*"
22 | 	exit 1
23 | fi
24 | base_dir=$(dirname $0)
25 | 
26 | if [ "x$KAFKA_LOG4J_OPTS" = "x" ]; then
27 |     export KAFKA_LOG4J_OPTS="-Dlog4j.configuration=file:$base_dir/../config/log4j.properties"
28 | fi
29 | 
30 | if [ "x$KAFKA_HEAP_OPTS" = "x" ]; then
31 |     export KAFKA_HEAP_OPTS="-Xmx1G -Xms1G"
32 | fi
33 | 
34 | EXTRA_ARGS=${EXTRA_ARGS-'-name kafkaServer -loggc'}
35 | 
36 | COMMAND=$1
37 | case $COMMAND in
38 |   -daemon)
39 |     EXTRA_ARGS="-daemon "$EXTRA_ARGS
40 |     shift
41 |     ;;
42 |   *)
43 |     ;;
44 | esac
45 | 
46 | exec $base_dir/kafka-run-class.sh $EXTRA_ARGS kafka.Kafka "$@"
47 | 


--------------------------------------------------------------------------------
/roles/zookeeper/templates/log4j.properties.j2:
--------------------------------------------------------------------------------
 1 | # Define some default values that can be overridden by system properties
 2 | zookeeper.root.logger=INFO, CONSOLE
 3 | zookeeper.console.threshold=INFO
 4 | zookeeper.log.dir={{ ZOOKEEPER_LOG_DIR }}
 5 | zookeeper.log.file=zookeeper.log
 6 | zookeeper.log.threshold=DEBUG
 7 | zookeeper.tracelog.dir=.
 8 | zookeeper.tracelog.file=zookeeper_trace.log
 9 | 
10 | #
11 | # ZooKeeper Logging Configuration
12 | #
13 | 
14 | # Format is "<default threshold> (, <appender>)+
15 | 
16 | # DEFAULT: console appender only
17 | log4j.rootLogger=${zookeeper.root.logger}
18 | 
19 | # Example with rolling log file
20 | #log4j.rootLogger=DEBUG, CONSOLE, ROLLINGFILE
21 | 
22 | # Example with rolling log file and tracing
23 | #log4j.rootLogger=TRACE, CONSOLE, ROLLINGFILE, TRACEFILE
24 | 
25 | #
26 | # Log INFO level and above messages to the console
27 | #
28 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
29 | log4j.appender.CONSOLE.Threshold=${zookeeper.console.threshold}
30 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
31 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n
32 | 
33 | #
34 | # Add ROLLINGFILE to rootLogger to get log file output
35 | #    Log DEBUG level and above messages to a log file
36 | log4j.appender.ROLLINGFILE=org.apache.log4j.RollingFileAppender
37 | log4j.appender.ROLLINGFILE.Threshold=${zookeeper.log.threshold}
38 | log4j.appender.ROLLINGFILE.File=${zookeeper.log.dir}/${zookeeper.log.file}
39 | 
40 | # Max log file size of 10MB
41 | log4j.appender.ROLLINGFILE.MaxFileSize=10MB
42 | # uncomment the next line to limit number of backup files
43 | #log4j.appender.ROLLINGFILE.MaxBackupIndex=10
44 | 
45 | log4j.appender.ROLLINGFILE.layout=org.apache.log4j.PatternLayout
46 | log4j.appender.ROLLINGFILE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n
47 | 
48 | 
49 | #
50 | # Add TRACEFILE to rootLogger to get log file output
51 | #    Log DEBUG level and above messages to a log file
52 | log4j.appender.TRACEFILE=org.apache.log4j.FileAppender
53 | log4j.appender.TRACEFILE.Threshold=TRACE
54 | log4j.appender.TRACEFILE.File=${zookeeper.tracelog.dir}/${zookeeper.tracelog.file}
55 | 
56 | log4j.appender.TRACEFILE.layout=org.apache.log4j.PatternLayout
57 | ### Notice we are including log4j's NDC here (%x)
58 | log4j.appender.TRACEFILE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L][%x] - %m%n
59 | 


--------------------------------------------------------------------------------
/roles/kafka/templates/server.properties.j2:
--------------------------------------------------------------------------------
  1 | # Licensed to the Apache Software Foundation (ASF) under one or more
  2 | # contributor license agreements.  See the NOTICE file distributed with
  3 | # this work for additional information regarding copyright ownership.
  4 | # The ASF licenses this file to You under the Apache License, Version 2.0
  5 | # (the "License"); you may not use this file except in compliance with
  6 | # the License.  You may obtain a copy of the License at
  7 | #
  8 | #    http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | # see kafka.server.KafkaConfig for additional details and defaults
 17 | 
 18 | ############################# Server Basics #############################
 19 | 
 20 | # The id of the broker. This must be set to a unique integer for each broker.
 21 | broker.id={% for host in groups['kafka'] %}{% if hostvars[host]['ansible_nodename'] == ansible_hostname %}{{loop.index}}{% endif %}{% endfor %}
 22 | 
 23 | # Switch to enable topic deletion or not, default value is false
 24 | #delete.topic.enable=true
 25 | 
 26 | ############################# Socket Server Settings #############################
 27 | 
 28 | # The address the socket server listens on. It will get the value returned from 
 29 | # java.net.InetAddress.getCanonicalHostName() if not configured.
 30 | #   FORMAT:
 31 | #     listeners = security_protocol://host_name:port
 32 | #   EXAMPLE:
 33 | #     listeners = PLAINTEXT://your.host.name:9092
 34 | #listeners=PLAINTEXT://:9092
 35 | 
 36 | # Hostname and port the broker will advertise to producers and consumers. If not set, 
 37 | # it uses the value for "listeners" if configured.  Otherwise, it will use the value
 38 | # returned from java.net.InetAddress.getCanonicalHostName().
 39 | advertised.listeners=PLAINTEXT://{{ ansible_hostname }}:9092
 40 | 
 41 | # The number of threads handling network requests
 42 | num.network.threads=3
 43 | 
 44 | # The number of threads doing disk I/O
 45 | num.io.threads=8
 46 | 
 47 | # The send buffer (SO_SNDBUF) used by the socket server
 48 | socket.send.buffer.bytes=102400
 49 | 
 50 | # The receive buffer (SO_RCVBUF) used by the socket server
 51 | socket.receive.buffer.bytes=102400
 52 | 
 53 | # The maximum size of a request that the socket server will accept (protection against OOM)
 54 | socket.request.max.bytes=104857600
 55 | 
 56 | 
 57 | ############################# Log Basics #############################
 58 | 
 59 | # A comma seperated list of directories under which to store log files
 60 | log.dirs={{ KAFKA_LOG_DIR }}
 61 | 
 62 | # The default number of log partitions per topic. More partitions allow greater
 63 | # parallelism for consumption, but this will also result in more files across
 64 | # the brokers.
 65 | num.partitions=1
 66 | 
 67 | # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown.
 68 | # This value is recommended to be increased for installations with data dirs located in RAID array.
 69 | num.recovery.threads.per.data.dir=1
 70 | 
 71 | ############################# Log Flush Policy #############################
 72 | 
 73 | # Messages are immediately written to the filesystem but by default we only fsync() to sync
 74 | # the OS cache lazily. The following configurations control the flush of data to disk.
 75 | # There are a few important trade-offs here:
 76 | #    1. Durability: Unflushed data may be lost if you are not using replication.
 77 | #    2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush.
 78 | #    3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks.
 79 | # The settings below allow one to configure the flush policy to flush data after a period of time or
 80 | # every N messages (or both). This can be done globally and overridden on a per-topic basis.
 81 | 
 82 | # The number of messages to accept before forcing a flush of data to disk
 83 | #log.flush.interval.messages=10000
 84 | 
 85 | # The maximum amount of time a message can sit in a log before we force a flush
 86 | #log.flush.interval.ms=1000
 87 | 
 88 | ############################# Log Retention Policy #############################
 89 | 
 90 | # The following configurations control the disposal of log segments. The policy can
 91 | # be set to delete segments after a period of time, or after a given size has accumulated.
 92 | # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens
 93 | # from the end of the log.
 94 | 
 95 | # The minimum age of a log file to be eligible for deletion
 96 | log.retention.hours=168
 97 | 
 98 | # A size-based retention policy for logs. Segments are pruned from the log as long as the remaining
 99 | # segments don't drop below log.retention.bytes.
100 | #log.retention.bytes=1073741824
101 | 
102 | # The maximum size of a log segment file. When this size is reached a new log segment will be created.
103 | log.segment.bytes=1073741824
104 | 
105 | # The interval at which log segments are checked to see if they can be deleted according
106 | # to the retention policies
107 | log.retention.check.interval.ms=300000
108 | 
109 | ############################# Zookeeper #############################
110 | 
111 | # Zookeeper connection string (see zookeeper docs for details).
112 | # This is a comma separated host:port pairs, each corresponding to a zk
113 | # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
114 | # You can also append an optional chroot string to the urls to specify the
115 | # root directory for all kafka znodes.
116 | zookeeper.connect={% for host in groups['zookeeper'] %}{{ hostvars[host]['ansible_eth0']['ipv4']['address'] }}:2181{% if not loop.last %},{% endif %}{% endfor %}
117 | 
118 | # Timeout in ms for connecting to zookeeper
119 | zookeeper.connection.timeout.ms=6000
120 | 
121 | 
122 | 


--------------------------------------------------------------------------------
/ec2.ini:
--------------------------------------------------------------------------------
  1 | # Ansible EC2 external inventory script settings
  2 | #
  3 | 
  4 | [ec2]
  5 | 
  6 | # to talk to a private eucalyptus instance uncomment these lines
  7 | # and edit edit eucalyptus_host to be the host name of your cloud controller
  8 | #eucalyptus = True
  9 | #eucalyptus_host = clc.cloud.domain.org
 10 | 
 11 | # AWS regions to make calls to. Set this to 'all' to make request to all regions
 12 | # in AWS and merge the results together. Alternatively, set this to a comma
 13 | # separated list of regions. E.g. 'us-east-1,us-west-1,us-west-2'
 14 | regions = all
 15 | regions_exclude = us-gov-west-1,cn-north-1
 16 | 
 17 | # When generating inventory, Ansible needs to know how to address a server.
 18 | # Each EC2 instance has a lot of variables associated with it. Here is the list:
 19 | #   http://docs.pythonboto.org/en/latest/ref/ec2.html#module-boto.ec2.instance
 20 | # Below are 2 variables that are used as the address of a server:
 21 | #   - destination_variable
 22 | #   - vpc_destination_variable
 23 | 
 24 | # This is the normal destination variable to use. If you are running Ansible
 25 | # from outside EC2, then 'public_dns_name' makes the most sense. If you are
 26 | # running Ansible from within EC2, then perhaps you want to use the internal
 27 | # address, and should set this to 'private_dns_name'. The key of an EC2 tag
 28 | # may optionally be used; however the boto instance variables hold precedence
 29 | # in the event of a collision.
 30 | destination_variable = public_dns_name
 31 | 
 32 | # This allows you to override the inventory_name with an ec2 variable, instead
 33 | # of using the destination_variable above. Addressing (aka ansible_ssh_host)
 34 | # will still use destination_variable. Tags should be written as 'tag_TAGNAME'.
 35 | #hostname_variable = tag_Name
 36 | 
 37 | # For server inside a VPC, using DNS names may not make sense. When an instance
 38 | # has 'subnet_id' set, this variable is used. If the subnet is public, setting
 39 | # this to 'ip_address' will return the public IP address. For instances in a
 40 | # private subnet, this should be set to 'private_ip_address', and Ansible must
 41 | # be run from within EC2. The key of an EC2 tag may optionally be used; however
 42 | # the boto instance variables hold precedence in the event of a collision.
 43 | # WARNING: - instances that are in the private vpc, _without_ public ip address
 44 | # will not be listed in the inventory until You set:
 45 | # vpc_destination_variable = private_ip_address
 46 | vpc_destination_variable = ip_address
 47 | 
 48 | # The following two settings allow flexible ansible host naming based on a
 49 | # python format string and a comma-separated list of ec2 tags.  Note that:
 50 | #
 51 | # 1) If the tags referenced are not present for some instances, empty strings
 52 | #    will be substituted in the format string.
 53 | # 2) This overrides both destination_variable and vpc_destination_variable.
 54 | #
 55 | #destination_format = {0}.{1}.example.com
 56 | #destination_format_tags = Name,environment
 57 | 
 58 | # To tag instances on EC2 with the resource records that point to them from
 59 | # Route53, uncomment and set 'route53' to True.
 60 | route53 = False
 61 | 
 62 | # To exclude RDS instances from the inventory, uncomment and set to False.
 63 | #rds = False
 64 | 
 65 | # To exclude ElastiCache instances from the inventory, uncomment and set to False.
 66 | #elasticache = False
 67 | 
 68 | # Additionally, you can specify the list of zones to exclude looking up in
 69 | # 'route53_excluded_zones' as a comma-separated list.
 70 | # route53_excluded_zones = samplezone1.com, samplezone2.com
 71 | 
 72 | # By default, only EC2 instances in the 'running' state are returned. Set
 73 | # 'all_instances' to True to return all instances regardless of state.
 74 | all_instances = False
 75 | 
 76 | # By default, only EC2 instances in the 'running' state are returned. Specify
 77 | # EC2 instance states to return as a comma-separated list. This
 78 | # option is overridden when 'all_instances' is True.
 79 | # instance_states = pending, running, shutting-down, terminated, stopping, stopped
 80 | 
 81 | # By default, only RDS instances in the 'available' state are returned.  Set
 82 | # 'all_rds_instances' to True return all RDS instances regardless of state.
 83 | all_rds_instances = False
 84 | 
 85 | # Include RDS cluster information (Aurora etc.)
 86 | include_rds_clusters = False
 87 | 
 88 | # By default, only ElastiCache clusters and nodes in the 'available' state
 89 | # are returned. Set 'all_elasticache_clusters' and/or 'all_elastic_nodes'
 90 | # to True return all ElastiCache clusters and nodes, regardless of state.
 91 | #
 92 | # Note that all_elasticache_nodes only applies to listed clusters. That means
 93 | # if you set all_elastic_clusters to false, no node will be return from
 94 | # unavailable clusters, regardless of the state and to what you set for
 95 | # all_elasticache_nodes.
 96 | all_elasticache_replication_groups = False
 97 | all_elasticache_clusters = False
 98 | all_elasticache_nodes = False
 99 | 
100 | # API calls to EC2 are slow. For this reason, we cache the results of an API
101 | # call. Set this to the path you want cache files to be written to. Two files
102 | # will be written to this directory:
103 | #   - ansible-ec2.cache
104 | #   - ansible-ec2.index
105 | cache_path = ~/.ansible/tmp
106 | 
107 | # The number of seconds a cache file is considered valid. After this many
108 | # seconds, a new API call will be made, and the cache file will be updated.
109 | # To disable the cache, set this value to 0
110 | cache_max_age = 300
111 | 
112 | # Organize groups into a nested/hierarchy instead of a flat namespace.
113 | nested_groups = False
114 | 
115 | # Replace - tags when creating groups to avoid issues with ansible
116 | replace_dash_in_groups = True
117 | 
118 | # If set to true, any tag of the form "a,b,c" is expanded into a list
119 | # and the results are used to create additional tag_* inventory groups.
120 | expand_csv_tags = False
121 | 
122 | # The EC2 inventory output can become very large. To manage its size,
123 | # configure which groups should be created.
124 | group_by_instance_id = True
125 | group_by_region = True
126 | group_by_availability_zone = True
127 | group_by_aws_account = False
128 | group_by_ami_id = True
129 | group_by_instance_type = True
130 | group_by_key_pair = True
131 | group_by_vpc_id = True
132 | group_by_security_group = True
133 | group_by_tag_keys = True
134 | group_by_tag_none = True
135 | group_by_route53_names = True
136 | group_by_rds_engine = True
137 | group_by_rds_parameter_group = True
138 | group_by_elasticache_engine = True
139 | group_by_elasticache_cluster = True
140 | group_by_elasticache_parameter_group = True
141 | group_by_elasticache_replication_group = True
142 | 
143 | # If you only want to include hosts that match a certain regular expression
144 | # pattern_include = staging-*
145 | 
146 | # If you want to exclude any hosts that match a certain regular expression
147 | # pattern_exclude = staging-*
148 | 
149 | # Instance filters can be used to control which instances are retrieved for
150 | # inventory. For the full list of possible filters, please read the EC2 API
151 | # docs: http://docs.aws.amazon.com/AWSEC2/latest/APIReference/ApiReference-query-DescribeInstances.html#query-DescribeInstances-filters
152 | # Filters are key/value pairs separated by '=', to list multiple filters use
153 | # a list separated by commas. See examples below.
154 | 
155 | # Retrieve only instances with (key=value) env=staging tag
156 | # instance_filters = tag:env=staging
157 | 
158 | # Retrieve only instances with role=webservers OR role=dbservers tag
159 | # instance_filters = tag:role=webservers,tag:role=dbservers
160 | 
161 | # Retrieve only t1.micro instances OR instances with tag env=staging
162 | # instance_filters = instance-type=t1.micro,tag:env=staging
163 | 
164 | # You can use wildcards in filter values also. Below will list instances which
165 | # tag Name value matches webservers1*
166 | # (ex. webservers15, webservers1a, webservers123 etc)
167 | # instance_filters = tag:Name=webservers1*
168 | 
169 | # A boto configuration profile may be used to separate out credentials
170 | # see http://boto.readthedocs.org/en/latest/boto_config_tut.html
171 | # boto_profile = some-boto-profile-name
172 | 
173 | 
174 | [credentials]
175 | 
176 | # The AWS credentials can optionally be specified here. Credentials specified
177 | # here are ignored if the environment variable AWS_ACCESS_KEY_ID or
178 | # AWS_PROFILE is set, or if the boto_profile property above is set.
179 | #
180 | # Supplying AWS credentials here is not recommended, as it introduces
181 | # non-trivial security concerns. When going down this route, please make sure
182 | # to set access permissions for this file correctly, e.g. handle it the same
183 | # way as you would a private SSH key.
184 | #
185 | # Unlike the boto and AWS configure files, this section does not support
186 | # profiles.
187 | #
188 | # aws_access_key_id = AXXXXXXXXXXXXXX
189 | # aws_secret_access_key = XXXXXXXXXXXXXXXXXXX
190 | # aws_security_token = XXXXXXXXXXXXXXXXXXXXXXXXXXXX
191 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## Ansible playbook to deploy distributed technologies
  2 | This project is a set of Ansible playbooks to easily install a set of distributed technologies on [AWS](https://aws.amazon.com/)
  3 | 
  4 | ## Table of Contents
  5 | 1. [Supported playbooks](#supported-playbooks)
  6 | 2. [Supported commands](#supported-commands)
  7 | 3. [Setup](#setup)
  8 |   * [On your local/remote machine](#on-your-localremote-machine)
  9 |   * [Using Docker container](#using-docker-container)
 10 | 4. [Playbooks](#playbooks)
 11 |   * [Launch/Terminate EC2 instances on AWS](#ec2)
 12 |   * [Zookeeper](#zookeeper)
 13 |   * [Kafka](#kafka)
 14 |   * [Vowpal Wabbit](#vowpal-wabbit)
 15 | 
 16 | ## Supported playbooks
 17 | * EC2
 18 | * Zookeeper
 19 | * Kafka
 20 | 
 21 | ## Supported Commands
 22 | ```bash
 23 | ~$ ansible-playbook <master-playbook>.yml --extra-vars "<var1>=<value1> <var2>=<value2>" --tags "<tag1>,<tag2>"
 24 | ```
 25 | * **EC2** playbook is controlled by a yaml file containing variables for the EC2 instances to be acted on. More details [below](#ec2)
 26 | * **Zookeeper**, **Kafka**, and **Vowpal Wabbit** playbooks need respective cluster tags to be specified to identify which nodes are in the cluster and need to be acted on. More details [below](#zookeeper)
 27 | 
 28 | ## Setup
 29 | ### On your local/remote machine
 30 | 1. [Setup ansible for your system](http://docs.ansible.com/ansible/intro_installation.html)
 31 | 2. Create following folders
 32 | 
 33 |   ```bash
 34 |   ~$ mkdir -p /etc/ansible/hosts
 35 |   ```
 36 | 3. Clone this repo
 37 | 
 38 |   ```bash
 39 |   ~$ git clone https://github.com/InsightDataScience/ansible-playbook.git
 40 |   ```
 41 | 
 42 | 4. Copy the `ec2.py` and `ec2.ini` files in this repo to `/etc/ansible/hosts`
 43 | 5. Update information in `ansible_example.cfg` and move it to `/etc/ansible/ansible.cfg`
 44 | 6. Export AWS credentials as environment variables
 45 | 
 46 |   ```bash
 47 |   export AWS_ACCESS_KEY_ID=XXXXXXXXXXXXXX
 48 |   export AWS_SECRET_ACCESS_KEY=XXXXXXXXXXXXXX
 49 |   ```
 50 | 
 51 | ## Using Docker container
 52 | 1. [Setup Docker for your system](https://docs.docker.com/engine/installation/)
 53 | 2. Clone this repo
 54 | 
 55 |   ```bash
 56 |   ~$ git clone https://github.com/InsightDataScience/ansible-playbook.git
 57 |   ```
 58 | 3. Build your docker image locally with the following command - run this from the root folder of this repo
 59 | 
 60 |   ```bash
 61 |   ~$ docker build -t ansible-playbook -f conf/Dockerfile .
 62 |   ```
 63 | 
 64 | 4. Run the docker container in interactive mode using the script in the repo - `run_ansible_playbook_container.sh`
 65 | 
 66 |   ```bash
 67 |   ~$ ./run_ansible_playbook_container.sh
 68 |   ```
 69 | 5. Update information in `/etc/ansible/ansible.cfg` config file inside the container
 70 | 6. Export AWS credentials in `~/.profile` inside the container
 71 | 
 72 |   ```bash
 73 |   export AWS_ACCESS_KEY_ID=XXXXXXXXXXXXXX
 74 |   export AWS_SECRET_ACCESS_KEY=XXXXXXXXXXXXXX
 75 |   ```
 76 | 
 77 | ## Playbooks
 78 | * ###EC2
 79 | 
 80 |   Launch/Start/Stop/Terminate EC2 instances on AWS.
 81 | 
 82 |   * ####Variable file: 
 83 |     
 84 |     Update `example_ec2_vars.yml` as per your requirement
 85 | 
 86 |     EC2 playbook is controlled by a yaml file with variables defined for the EC2 instances. An example variable file -`example_ec2_vars.yml` - is included in this repo. You can define your own yaml file with the following information:
 87 | 
 88 |     ```yaml
 89 |     ---
 90 |     key_pair: <key-name>
 91 |     instance_type: <instance-type>
 92 |     region: <region>
 93 |     security_group_id: <security-group-id>
 94 |     num_instances: <num-of-instances>
 95 |     subnet_id: <subent-id>
 96 |     tag_key_vals:
 97 |       Name: <cluster-name>
 98 |       <custom-tag-key1>: <custom-tag-val1>
 99 |       <custom-tag-key2>: <custom-tag-val2>
100 |     ```
101 | 
102 |     The `Name` tag in the `tag_key_vals` is mandatory to create an identifier for the instances. More tags can be added if needed but are optional.
103 | 
104 |     In your terminal, you will likely also need to add your private key to an ssh agent:
105 | 
106 |     ```bash
107 |     ssh-add </path/to/my.pem>
108 |     ```
109 | 
110 |   * ####Launch EC2 instances:
111 |     
112 |     ```bash
113 |     ~$ ansible-playbook ./ec2.yml --extra-vars "vars_file=./example_ec2_vars.yml" --tags launch
114 |     ```
115 |   * ####Stop EC2 instances:
116 |   
117 |     ```bash
118 |     ~$ ansible-playbook ./ec2.yml --extra-vars "vars_file=./example_ec2_vars.yml" --tags stop 
119 |     ```
120 |   * ####Start EC2 instances:
121 |   
122 |     ```bash
123 |     ~$ ansible-playbook ./ec2.yml --extra-vars "vars_file=./example_ec2_vars.yml" --tags start 
124 |     ```
125 |   * ####Terminate EC2 instances:
126 |   
127 |     ```bash
128 |     ~$ ansible-playbook ./ec2.yml --extra-vars "vars_file=./example_ec2_vars.yml" --tags terminate
129 |     ```
130 | 
131 | * ###Zookeeper
132 |   For Zookeeper playbook, a `zookeeper_tag` needs to be specified to identify the nodes in the cluster. This `zookeeper_tag` can be any tag specified in `tag_key_vals` in the variable file for [EC2]( while launching EC2 instances.
133 | 
134 |   The `zookeeper_tag` is specifed as `<key>_<value>` for one of the `tag_key_vals` to be used. For example, if the `<cluster-name>` in the [EC2 variable file](example_ec2_vars.yml) mentioned above was `test-cluster`, the `zookeeper_tag` would be specified as `zookeeper_tag=Name_test-cluster`. It doesn't have to be the `Name` tag but could be any key value pair in `tag_key_vals` specified as `zookeeper_tag=<key>_<value>`.
135 | 
136 |   * ####Install Zookeeper:
137 | 
138 |     ```bash
139 |     ~$ ansible-playbook ./zookeeper.yml --extra-vars "zookeeper_tag=<cluster_tag>" --tags install
140 |     ```
141 |   * ####Start Zookeeper:
142 | 
143 |     ```bash
144 |     ~$ ansible-playbook ./zookeeper.yml --extra-vars "zookeeper_tag=<cluster_tag>" --tags start
145 |     ```
146 |   * ####Get info about Zookeeper on the specified cluster:
147 | 
148 |     ```bash
149 |     ~$ ansible-playbook ./zookeeper.yml --extra-vars "zookeeper_tag=<cluster_tag>" --tags info
150 |     ```
151 |   * ####Stop Zookeeper:
152 | 
153 |     ```bash
154 |     ~$ ansible-playbook ./zookeeper.yml --extra-vars "zookeeper_tag=<cluster_tag>" --tags stop 
155 |     ```
156 |   * ####Uninstall Zookeeper:
157 | 
158 |     ```bash
159 |     ~$ ansible-playbook ./zookeeper.yml --extra-vars "zookeeper_tag=<cluster_tag>" --tags uninstall
160 |     ```
161 | 
162 | * ###Kafka
163 |   Kafka has a dependency on Zookeeper for cluster membership, topic configuration, data partition, etc. For Kafka playbook, a `zookeeper_tag` and a `kafka_tag` needs to be specified to identify the nodes in the zookeeper and kafka cluster respectively. The `kafka_tag` and `zookeeper_tag` can be any tag specified in `tag_key_vals` in the [variable file for EC2](#variable-file).
164 | 
165 |   The `kafka_tag` and `zookeeper_tag` are specifed as `<key>_<value>` for one of the `tag_key_vals` to be used. For example, if the `<cluster-name>` in the [EC2 variable file](#variable-file) mentioned above was `test-cluster` and we had same cluster for Zookeeper and Kafka, the `kafka_tag` and `zookeeper_tag` would be specified as `zookeeper_tag=Name_test-cluster` and `kafka_tag=Name_test-cluster` respectively. Both Zookeeper and Kafka don't have to be on the same cluster and it doesn't have to be the `Name` tag but it could be any key value pair in `tag_key_vals` specified as `zookeeper_tag=<key>_<value>` and `kafka_tag=<key>_<value>`.
166 | 
167 |   ####Kafka's dependency on Zookeeper
168 | 
169 |   Kafka's dependency on Zookeeper is taken care of by the Kafka playbook. If you are trying to ssetup Kafka on the cluster specified by `kafka_tag`, the playbook will check that Zookeeper is installed on the cluster `zookeeper_tag` and if it is not setup, the playbook will first setup Zookeeper and then Kafka. By default, any operation on Kafka cluster, like `start`, `install`, etc., will first be executed on the Zookeeper cluster. However, we would want some of the operations to be executed on the Kafka cluster, like `stop`, `uninstall`, etc., not be executed on the Zookeeper cluster. This can be achieved by specifying a flag `--skip-tags zookeeper` while running the Kafka playbook. Examples for this behavior are shown below in the `stop` and `uninstall` operations.
170 |   
171 | 
172 |   * ####Install Kafka:
173 | 
174 |     ```bash
175 |     ~$ ansible-playbook ./kafka.yml --extra-vars "zookeeper_tag=<cluster_tag> kafka_tag=<cluster_tag>" --tags install
176 |     ```
177 |   * ####Start Kafka:
178 | 
179 |     ```bash
180 |     ~$ ansible-playbook ./kafka.yml --extra-vars "zookeeper_tag=<cluster_tag> kafka_tag=<cluster_tag>" --tags start
181 |     ```
182 |   * ####Get info about Kafka on the specified cluster:
183 | 
184 |     ```bash
185 |     ~$ ansible-playbook ./kafka.yml --extra-vars "zookeeper_tag=<cluster_tag> kafka_tag=<cluster_tag>" --tags info
186 |     ```
187 |   * ####Stop Kafka:
188 | 
189 |     ```bash
190 |     ~$ ansible-playbook ./kafka.yml --extra-vars "zookeeper_tag=<cluster_tag> kafka_tag=<cluster_tag>" --tags stop --skip-tags zookeeper
191 |     ```
192 |   * ####Uninstall Kafka:
193 | 
194 |     ```bash
195 |     ~$ ansible-playbook ./kafka.yml --extra-vars "zookeeper_tag=<cluster_tag> kafka_tag=<cluster_tag>" --tags uninstall --skip-tags zookeeper
196 | 
197 |     ```
198 | 
199 |  *  #### Vowpal Wabbit   
200 | 
201 | Vowpal Wabbit is a fast out-of-core Machine Learning system. Installation can take upwards of 10 minutes on micro instances, as it compiles a lot of C++ with high optimization levels using Clang. 
202 | 
203 |   * ####Install Vowpal Wabbit:
204 | 
205 |     ```bash
206 |     ~$ ansible-playbook ./vw.yml --extra-vars "vw_tag=class_vw" --tags install
207 |     ```
208 | 
209 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/ec2.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env python
   2 | 
   3 | '''
   4 | EC2 external inventory script
   5 | =================================
   6 | 
   7 | Generates inventory that Ansible can understand by making API request to
   8 | AWS EC2 using the Boto library.
   9 | 
  10 | NOTE: This script assumes Ansible is being executed where the environment
  11 | variables needed for Boto have already been set:
  12 |     export AWS_ACCESS_KEY_ID='AK123'
  13 |     export AWS_SECRET_ACCESS_KEY='abc123'
  14 | 
  15 | This script also assumes there is an ec2.ini file alongside it.  To specify a
  16 | different path to ec2.ini, define the EC2_INI_PATH environment variable:
  17 | 
  18 |     export EC2_INI_PATH=/path/to/my_ec2.ini
  19 | 
  20 | If you're using eucalyptus you need to set the above variables and
  21 | you need to define:
  22 | 
  23 |     export EC2_URL=http://hostname_of_your_cc:port/services/Eucalyptus
  24 | 
  25 | If you're using boto profiles (requires boto>=2.24.0) you can choose a profile
  26 | using the --boto-profile command line argument (e.g. ec2.py --boto-profile prod) or using
  27 | the AWS_PROFILE variable:
  28 | 
  29 |     AWS_PROFILE=prod ansible-playbook -i ec2.py myplaybook.yml
  30 | 
  31 | For more details, see: http://docs.pythonboto.org/en/latest/boto_config_tut.html
  32 | 
  33 | When run against a specific host, this script returns the following variables:
  34 |  - ec2_ami_launch_index
  35 |  - ec2_architecture
  36 |  - ec2_association
  37 |  - ec2_attachTime
  38 |  - ec2_attachment
  39 |  - ec2_attachmentId
  40 |  - ec2_block_devices
  41 |  - ec2_client_token
  42 |  - ec2_deleteOnTermination
  43 |  - ec2_description
  44 |  - ec2_deviceIndex
  45 |  - ec2_dns_name
  46 |  - ec2_eventsSet
  47 |  - ec2_group_name
  48 |  - ec2_hypervisor
  49 |  - ec2_id
  50 |  - ec2_image_id
  51 |  - ec2_instanceState
  52 |  - ec2_instance_type
  53 |  - ec2_ipOwnerId
  54 |  - ec2_ip_address
  55 |  - ec2_item
  56 |  - ec2_kernel
  57 |  - ec2_key_name
  58 |  - ec2_launch_time
  59 |  - ec2_monitored
  60 |  - ec2_monitoring
  61 |  - ec2_networkInterfaceId
  62 |  - ec2_ownerId
  63 |  - ec2_persistent
  64 |  - ec2_placement
  65 |  - ec2_platform
  66 |  - ec2_previous_state
  67 |  - ec2_private_dns_name
  68 |  - ec2_private_ip_address
  69 |  - ec2_publicIp
  70 |  - ec2_public_dns_name
  71 |  - ec2_ramdisk
  72 |  - ec2_reason
  73 |  - ec2_region
  74 |  - ec2_requester_id
  75 |  - ec2_root_device_name
  76 |  - ec2_root_device_type
  77 |  - ec2_security_group_ids
  78 |  - ec2_security_group_names
  79 |  - ec2_shutdown_state
  80 |  - ec2_sourceDestCheck
  81 |  - ec2_spot_instance_request_id
  82 |  - ec2_state
  83 |  - ec2_state_code
  84 |  - ec2_state_reason
  85 |  - ec2_status
  86 |  - ec2_subnet_id
  87 |  - ec2_tenancy
  88 |  - ec2_virtualization_type
  89 |  - ec2_vpc_id
  90 | 
  91 | These variables are pulled out of a boto.ec2.instance object. There is a lack of
  92 | consistency with variable spellings (camelCase and underscores) since this
  93 | just loops through all variables the object exposes. It is preferred to use the
  94 | ones with underscores when multiple exist.
  95 | 
  96 | In addition, if an instance has AWS Tags associated with it, each tag is a new
  97 | variable named:
  98 |  - ec2_tag_[Key] = [Value]
  99 | 
 100 | Security groups are comma-separated in 'ec2_security_group_ids' and
 101 | 'ec2_security_group_names'.
 102 | '''
 103 | 
 104 | # (c) 2012, Peter Sankauskas
 105 | #
 106 | # This file is part of Ansible,
 107 | #
 108 | # Ansible is free software: you can redistribute it and/or modify
 109 | # it under the terms of the GNU General Public License as published by
 110 | # the Free Software Foundation, either version 3 of the License, or
 111 | # (at your option) any later version.
 112 | #
 113 | # Ansible is distributed in the hope that it will be useful,
 114 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 115 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 116 | # GNU General Public License for more details.
 117 | #
 118 | # You should have received a copy of the GNU General Public License
 119 | # along with Ansible.  If not, see <http://www.gnu.org/licenses/>.
 120 | 
 121 | ######################################################################
 122 | 
 123 | import sys
 124 | import os
 125 | import argparse
 126 | import re
 127 | from time import time
 128 | import boto
 129 | from boto import ec2
 130 | from boto import rds
 131 | from boto import elasticache
 132 | from boto import route53
 133 | import six
 134 | 
 135 | from ansible.module_utils import ec2 as ec2_utils
 136 | 
 137 | HAS_BOTO3 = False
 138 | try:
 139 |     import boto3
 140 |     HAS_BOTO3 = True
 141 | except ImportError:
 142 |     pass
 143 | 
 144 | from six.moves import configparser
 145 | from collections import defaultdict
 146 | 
 147 | try:
 148 |     import json
 149 | except ImportError:
 150 |     import simplejson as json
 151 | 
 152 | 
 153 | class Ec2Inventory(object):
 154 | 
 155 |     def _empty_inventory(self):
 156 |         return {"_meta" : {"hostvars" : {}}}
 157 | 
 158 |     def __init__(self):
 159 |         ''' Main execution path '''
 160 | 
 161 |         # Inventory grouped by instance IDs, tags, security groups, regions,
 162 |         # and availability zones
 163 |         self.inventory = self._empty_inventory()
 164 | 
 165 |         self.aws_account_id = None
 166 | 
 167 |         # Index of hostname (address) to instance ID
 168 |         self.index = {}
 169 | 
 170 |         # Boto profile to use (if any)
 171 |         self.boto_profile = None
 172 | 
 173 |         # AWS credentials.
 174 |         self.credentials = {}
 175 | 
 176 |         # Read settings and parse CLI arguments
 177 |         self.parse_cli_args()
 178 |         self.read_settings()
 179 | 
 180 |         # Make sure that profile_name is not passed at all if not set
 181 |         # as pre 2.24 boto will fall over otherwise
 182 |         if self.boto_profile:
 183 |             if not hasattr(boto.ec2.EC2Connection, 'profile_name'):
 184 |                 self.fail_with_error("boto version must be >= 2.24 to use profile")
 185 | 
 186 |         # Cache
 187 |         if self.args.refresh_cache:
 188 |             self.do_api_calls_update_cache()
 189 |         elif not self.is_cache_valid():
 190 |             self.do_api_calls_update_cache()
 191 | 
 192 |         # Data to print
 193 |         if self.args.host:
 194 |             data_to_print = self.get_host_info()
 195 | 
 196 |         elif self.args.list:
 197 |             # Display list of instances for inventory
 198 |             if self.inventory == self._empty_inventory():
 199 |                 data_to_print = self.get_inventory_from_cache()
 200 |             else:
 201 |                 data_to_print = self.json_format_dict(self.inventory, True)
 202 | 
 203 |         print(data_to_print)
 204 | 
 205 | 
 206 |     def is_cache_valid(self):
 207 |         ''' Determines if the cache files have expired, or if it is still valid '''
 208 | 
 209 |         if os.path.isfile(self.cache_path_cache):
 210 |             mod_time = os.path.getmtime(self.cache_path_cache)
 211 |             current_time = time()
 212 |             if (mod_time + self.cache_max_age) > current_time:
 213 |                 if os.path.isfile(self.cache_path_index):
 214 |                     return True
 215 | 
 216 |         return False
 217 | 
 218 | 
 219 |     def read_settings(self):
 220 |         ''' Reads the settings from the ec2.ini file '''
 221 |         if six.PY3:
 222 |             config = configparser.ConfigParser()
 223 |         else:
 224 |             config = configparser.SafeConfigParser()
 225 |         ec2_default_ini_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'ec2.ini')
 226 |         ec2_ini_path = os.path.expanduser(os.path.expandvars(os.environ.get('EC2_INI_PATH', ec2_default_ini_path)))
 227 |         config.read(ec2_ini_path)
 228 | 
 229 |         # is eucalyptus?
 230 |         self.eucalyptus_host = None
 231 |         self.eucalyptus = False
 232 |         if config.has_option('ec2', 'eucalyptus'):
 233 |             self.eucalyptus = config.getboolean('ec2', 'eucalyptus')
 234 |         if self.eucalyptus and config.has_option('ec2', 'eucalyptus_host'):
 235 |             self.eucalyptus_host = config.get('ec2', 'eucalyptus_host')
 236 | 
 237 |         # Regions
 238 |         self.regions = []
 239 |         configRegions = config.get('ec2', 'regions')
 240 |         configRegions_exclude = config.get('ec2', 'regions_exclude')
 241 |         if (configRegions == 'all'):
 242 |             if self.eucalyptus_host:
 243 |                 self.regions.append(boto.connect_euca(host=self.eucalyptus_host).region.name, **self.credentials)
 244 |             else:
 245 |                 for regionInfo in ec2.regions():
 246 |                     if regionInfo.name not in configRegions_exclude:
 247 |                         self.regions.append(regionInfo.name)
 248 |         else:
 249 |             self.regions = configRegions.split(",")
 250 | 
 251 |         # Destination addresses
 252 |         self.destination_variable = config.get('ec2', 'destination_variable')
 253 |         self.vpc_destination_variable = config.get('ec2', 'vpc_destination_variable')
 254 | 
 255 |         if config.has_option('ec2', 'hostname_variable'):
 256 |             self.hostname_variable = config.get('ec2', 'hostname_variable')
 257 |         else:
 258 |             self.hostname_variable = None
 259 | 
 260 |         if config.has_option('ec2', 'destination_format') and \
 261 |            config.has_option('ec2', 'destination_format_tags'):
 262 |             self.destination_format = config.get('ec2', 'destination_format')
 263 |             self.destination_format_tags = config.get('ec2', 'destination_format_tags').split(',')
 264 |         else:
 265 |             self.destination_format = None
 266 |             self.destination_format_tags = None
 267 | 
 268 |         # Route53
 269 |         self.route53_enabled = config.getboolean('ec2', 'route53')
 270 |         self.route53_excluded_zones = []
 271 |         if config.has_option('ec2', 'route53_excluded_zones'):
 272 |             self.route53_excluded_zones.extend(
 273 |                 config.get('ec2', 'route53_excluded_zones', '').split(','))
 274 | 
 275 |         # Include RDS instances?
 276 |         self.rds_enabled = True
 277 |         if config.has_option('ec2', 'rds'):
 278 |             self.rds_enabled = config.getboolean('ec2', 'rds')
 279 | 
 280 |         # Include RDS cluster instances?
 281 |         if config.has_option('ec2', 'include_rds_clusters'):
 282 |             self.include_rds_clusters = config.getboolean('ec2', 'include_rds_clusters')
 283 |         else:
 284 |             self.include_rds_clusters = False
 285 | 
 286 |         # Include ElastiCache instances?
 287 |         self.elasticache_enabled = True
 288 |         if config.has_option('ec2', 'elasticache'):
 289 |             self.elasticache_enabled = config.getboolean('ec2', 'elasticache')
 290 | 
 291 |         # Return all EC2 instances?
 292 |         if config.has_option('ec2', 'all_instances'):
 293 |             self.all_instances = config.getboolean('ec2', 'all_instances')
 294 |         else:
 295 |             self.all_instances = False
 296 | 
 297 |         # Instance states to be gathered in inventory. Default is 'running'.
 298 |         # Setting 'all_instances' to 'yes' overrides this option.
 299 |         ec2_valid_instance_states = [
 300 |             'pending',
 301 |             'running',
 302 |             'shutting-down',
 303 |             'terminated',
 304 |             'stopping',
 305 |             'stopped'
 306 |         ]
 307 |         self.ec2_instance_states = []
 308 |         if self.all_instances:
 309 |             self.ec2_instance_states = ec2_valid_instance_states
 310 |         elif config.has_option('ec2', 'instance_states'):
 311 |           for instance_state in config.get('ec2', 'instance_states').split(','):
 312 |             instance_state = instance_state.strip()
 313 |             if instance_state not in ec2_valid_instance_states:
 314 |               continue
 315 |             self.ec2_instance_states.append(instance_state)
 316 |         else:
 317 |           self.ec2_instance_states = ['running']
 318 | 
 319 |         # Return all RDS instances? (if RDS is enabled)
 320 |         if config.has_option('ec2', 'all_rds_instances') and self.rds_enabled:
 321 |             self.all_rds_instances = config.getboolean('ec2', 'all_rds_instances')
 322 |         else:
 323 |             self.all_rds_instances = False
 324 | 
 325 |         # Return all ElastiCache replication groups? (if ElastiCache is enabled)
 326 |         if config.has_option('ec2', 'all_elasticache_replication_groups') and self.elasticache_enabled:
 327 |             self.all_elasticache_replication_groups = config.getboolean('ec2', 'all_elasticache_replication_groups')
 328 |         else:
 329 |             self.all_elasticache_replication_groups = False
 330 | 
 331 |         # Return all ElastiCache clusters? (if ElastiCache is enabled)
 332 |         if config.has_option('ec2', 'all_elasticache_clusters') and self.elasticache_enabled:
 333 |             self.all_elasticache_clusters = config.getboolean('ec2', 'all_elasticache_clusters')
 334 |         else:
 335 |             self.all_elasticache_clusters = False
 336 | 
 337 |         # Return all ElastiCache nodes? (if ElastiCache is enabled)
 338 |         if config.has_option('ec2', 'all_elasticache_nodes') and self.elasticache_enabled:
 339 |             self.all_elasticache_nodes = config.getboolean('ec2', 'all_elasticache_nodes')
 340 |         else:
 341 |             self.all_elasticache_nodes = False
 342 | 
 343 |         # boto configuration profile (prefer CLI argument)
 344 |         self.boto_profile = self.args.boto_profile
 345 |         if config.has_option('ec2', 'boto_profile') and not self.boto_profile:
 346 |             self.boto_profile = config.get('ec2', 'boto_profile')
 347 | 
 348 |         # AWS credentials (prefer environment variables)
 349 |         if not (self.boto_profile or os.environ.get('AWS_ACCESS_KEY_ID') or
 350 |                 os.environ.get('AWS_PROFILE')):
 351 |             if config.has_option('credentials', 'aws_access_key_id'):
 352 |                 aws_access_key_id = config.get('credentials', 'aws_access_key_id')
 353 |             else:
 354 |                 aws_access_key_id = None
 355 |             if config.has_option('credentials', 'aws_secret_access_key'):
 356 |                 aws_secret_access_key = config.get('credentials', 'aws_secret_access_key')
 357 |             else:
 358 |                 aws_secret_access_key = None
 359 |             if config.has_option('credentials', 'aws_security_token'):
 360 |                 aws_security_token = config.get('credentials', 'aws_security_token')
 361 |             else:
 362 |                 aws_security_token = None
 363 |             if aws_access_key_id:
 364 |                 self.credentials = {
 365 |                     'aws_access_key_id': aws_access_key_id,
 366 |                     'aws_secret_access_key': aws_secret_access_key
 367 |                 }
 368 |                 if aws_security_token:
 369 |                     self.credentials['security_token'] = aws_security_token
 370 | 
 371 |         # Cache related
 372 |         cache_dir = os.path.expanduser(config.get('ec2', 'cache_path'))
 373 |         if self.boto_profile:
 374 |             cache_dir = os.path.join(cache_dir, 'profile_' + self.boto_profile)
 375 |         if not os.path.exists(cache_dir):
 376 |             os.makedirs(cache_dir)
 377 | 
 378 |         cache_name = 'ansible-ec2'
 379 |         aws_profile = lambda: (self.boto_profile or
 380 |                                os.environ.get('AWS_PROFILE') or
 381 |                                os.environ.get('AWS_ACCESS_KEY_ID') or
 382 |                                self.credentials.get('aws_access_key_id', None))
 383 |         if aws_profile():
 384 |             cache_name = '%s-%s' % (cache_name, aws_profile())
 385 |         self.cache_path_cache = cache_dir + "/%s.cache" % cache_name
 386 |         self.cache_path_index = cache_dir + "/%s.index" % cache_name
 387 |         self.cache_max_age = config.getint('ec2', 'cache_max_age')
 388 | 
 389 |         if config.has_option('ec2', 'expand_csv_tags'):
 390 |             self.expand_csv_tags = config.getboolean('ec2', 'expand_csv_tags')
 391 |         else:
 392 |             self.expand_csv_tags = False
 393 | 
 394 |         # Configure nested groups instead of flat namespace.
 395 |         if config.has_option('ec2', 'nested_groups'):
 396 |             self.nested_groups = config.getboolean('ec2', 'nested_groups')
 397 |         else:
 398 |             self.nested_groups = False
 399 | 
 400 |         # Replace dash or not in group names
 401 |         if config.has_option('ec2', 'replace_dash_in_groups'):
 402 |             self.replace_dash_in_groups = config.getboolean('ec2', 'replace_dash_in_groups')
 403 |         else:
 404 |             self.replace_dash_in_groups = True
 405 | 
 406 |         # Configure which groups should be created.
 407 |         group_by_options = [
 408 |             'group_by_instance_id',
 409 |             'group_by_region',
 410 |             'group_by_availability_zone',
 411 |             'group_by_ami_id',
 412 |             'group_by_instance_type',
 413 |             'group_by_key_pair',
 414 |             'group_by_vpc_id',
 415 |             'group_by_security_group',
 416 |             'group_by_tag_keys',
 417 |             'group_by_tag_none',
 418 |             'group_by_route53_names',
 419 |             'group_by_rds_engine',
 420 |             'group_by_rds_parameter_group',
 421 |             'group_by_elasticache_engine',
 422 |             'group_by_elasticache_cluster',
 423 |             'group_by_elasticache_parameter_group',
 424 |             'group_by_elasticache_replication_group',
 425 |             'group_by_aws_account',
 426 |         ]
 427 |         for option in group_by_options:
 428 |             if config.has_option('ec2', option):
 429 |                 setattr(self, option, config.getboolean('ec2', option))
 430 |             else:
 431 |                 setattr(self, option, True)
 432 | 
 433 |         # Do we need to just include hosts that match a pattern?
 434 |         try:
 435 |             pattern_include = config.get('ec2', 'pattern_include')
 436 |             if pattern_include and len(pattern_include) > 0:
 437 |                 self.pattern_include = re.compile(pattern_include)
 438 |             else:
 439 |                 self.pattern_include = None
 440 |         except configparser.NoOptionError:
 441 |             self.pattern_include = None
 442 | 
 443 |         # Do we need to exclude hosts that match a pattern?
 444 |         try:
 445 |             pattern_exclude = config.get('ec2', 'pattern_exclude');
 446 |             if pattern_exclude and len(pattern_exclude) > 0:
 447 |                 self.pattern_exclude = re.compile(pattern_exclude)
 448 |             else:
 449 |                 self.pattern_exclude = None
 450 |         except configparser.NoOptionError:
 451 |             self.pattern_exclude = None
 452 | 
 453 |         # Instance filters (see boto and EC2 API docs). Ignore invalid filters.
 454 |         self.ec2_instance_filters = defaultdict(list)
 455 |         if config.has_option('ec2', 'instance_filters'):
 456 | 
 457 |             filters = [f for f in config.get('ec2', 'instance_filters').split(',') if f]
 458 | 
 459 |             for instance_filter in filters:
 460 |                 instance_filter = instance_filter.strip()
 461 |                 if not instance_filter or '=' not in instance_filter:
 462 |                     continue
 463 |                 filter_key, filter_value = [x.strip() for x in instance_filter.split('=', 1)]
 464 |                 if not filter_key:
 465 |                     continue
 466 |                 self.ec2_instance_filters[filter_key].append(filter_value)
 467 | 
 468 |     def parse_cli_args(self):
 469 |         ''' Command line argument processing '''
 470 | 
 471 |         parser = argparse.ArgumentParser(description='Produce an Ansible Inventory file based on EC2')
 472 |         parser.add_argument('--list', action='store_true', default=True,
 473 |                            help='List instances (default: True)')
 474 |         parser.add_argument('--host', action='store',
 475 |                            help='Get all the variables about a specific instance')
 476 |         parser.add_argument('--refresh-cache', action='store_true', default=False,
 477 |                            help='Force refresh of cache by making API requests to EC2 (default: False - use cache files)')
 478 |         parser.add_argument('--profile', '--boto-profile', action='store', dest='boto_profile',
 479 |                            help='Use boto profile for connections to EC2')
 480 |         self.args = parser.parse_args()
 481 | 
 482 | 
 483 |     def do_api_calls_update_cache(self):
 484 |         ''' Do API calls to each region, and save data in cache files '''
 485 | 
 486 |         if self.route53_enabled:
 487 |             self.get_route53_records()
 488 | 
 489 |         for region in self.regions:
 490 |             self.get_instances_by_region(region)
 491 |             if self.rds_enabled:
 492 |                 self.get_rds_instances_by_region(region)
 493 |             if self.elasticache_enabled:
 494 |                 self.get_elasticache_clusters_by_region(region)
 495 |                 self.get_elasticache_replication_groups_by_region(region)
 496 |             if self.include_rds_clusters:
 497 |                 self.include_rds_clusters_by_region(region)
 498 | 
 499 |         self.write_to_cache(self.inventory, self.cache_path_cache)
 500 |         self.write_to_cache(self.index, self.cache_path_index)
 501 | 
 502 |     def connect(self, region):
 503 |         ''' create connection to api server'''
 504 |         if self.eucalyptus:
 505 |             conn = boto.connect_euca(host=self.eucalyptus_host, **self.credentials)
 506 |             conn.APIVersion = '2010-08-31'
 507 |         else:
 508 |             conn = self.connect_to_aws(ec2, region)
 509 |         return conn
 510 | 
 511 |     def boto_fix_security_token_in_profile(self, connect_args):
 512 |         ''' monkey patch for boto issue boto/boto#2100 '''
 513 |         profile = 'profile ' + self.boto_profile
 514 |         if boto.config.has_option(profile, 'aws_security_token'):
 515 |             connect_args['security_token'] = boto.config.get(profile, 'aws_security_token')
 516 |         return connect_args
 517 | 
 518 |     def connect_to_aws(self, module, region):
 519 |         connect_args = self.credentials
 520 | 
 521 |         # only pass the profile name if it's set (as it is not supported by older boto versions)
 522 |         if self.boto_profile:
 523 |             connect_args['profile_name'] = self.boto_profile
 524 |             self.boto_fix_security_token_in_profile(connect_args)
 525 | 
 526 |         conn = module.connect_to_region(region, **connect_args)
 527 |         # connect_to_region will fail "silently" by returning None if the region name is wrong or not supported
 528 |         if conn is None:
 529 |             self.fail_with_error("region name: %s likely not supported, or AWS is down.  connection to region failed." % region)
 530 |         return conn
 531 | 
 532 |     def get_instances_by_region(self, region):
 533 |         ''' Makes an AWS EC2 API call to the list of instances in a particular
 534 |         region '''
 535 | 
 536 |         try:
 537 |             conn = self.connect(region)
 538 |             reservations = []
 539 |             if self.ec2_instance_filters:
 540 |                 for filter_key, filter_values in self.ec2_instance_filters.items():
 541 |                     reservations.extend(conn.get_all_instances(filters = { filter_key : filter_values }))
 542 |             else:
 543 |                 reservations = conn.get_all_instances()
 544 | 
 545 |             # Pull the tags back in a second step
 546 |             # AWS are on record as saying that the tags fetched in the first `get_all_instances` request are not
 547 |             # reliable and may be missing, and the only way to guarantee they are there is by calling `get_all_tags`
 548 |             instance_ids = []
 549 |             for reservation in reservations:
 550 |                 instance_ids.extend([instance.id for instance in reservation.instances])
 551 | 
 552 |             max_filter_value = 199
 553 |             tags = []
 554 |             for i in range(0, len(instance_ids), max_filter_value):
 555 |                 tags.extend(conn.get_all_tags(filters={'resource-type': 'instance', 'resource-id': instance_ids[i:i+max_filter_value]}))
 556 | 
 557 |             tags_by_instance_id = defaultdict(dict)
 558 |             for tag in tags:
 559 |                 tags_by_instance_id[tag.res_id][tag.name] = tag.value
 560 | 
 561 |             if (not self.aws_account_id) and reservations:
 562 |                 self.aws_account_id = reservations[0].owner_id
 563 | 
 564 |             for reservation in reservations:
 565 |                 for instance in reservation.instances:
 566 |                     instance.tags = tags_by_instance_id[instance.id]
 567 |                     self.add_instance(instance, region)
 568 | 
 569 |         except boto.exception.BotoServerError as e:
 570 |             if e.error_code == 'AuthFailure':
 571 |                 error = self.get_auth_error_message()
 572 |             else:
 573 |                 backend = 'Eucalyptus' if self.eucalyptus else 'AWS'
 574 |                 error = "Error connecting to %s backend.\n%s" % (backend, e.message)
 575 |             self.fail_with_error(error, 'getting EC2 instances')
 576 | 
 577 |     def get_rds_instances_by_region(self, region):
 578 |         ''' Makes an AWS API call to the list of RDS instances in a particular
 579 |         region '''
 580 | 
 581 |         try:
 582 |             conn = self.connect_to_aws(rds, region)
 583 |             if conn:
 584 |                 marker = None
 585 |                 while True:
 586 |                     instances = conn.get_all_dbinstances(marker=marker)
 587 |                     marker = instances.marker
 588 |                     for instance in instances:
 589 |                         self.add_rds_instance(instance, region)
 590 |                     if not marker:
 591 |                         break
 592 |         except boto.exception.BotoServerError as e:
 593 |             error = e.reason
 594 | 
 595 |             if e.error_code == 'AuthFailure':
 596 |                 error = self.get_auth_error_message()
 597 |             if not e.reason == "Forbidden":
 598 |                 error = "Looks like AWS RDS is down:\n%s" % e.message
 599 |             self.fail_with_error(error, 'getting RDS instances')
 600 | 
 601 |     def include_rds_clusters_by_region(self, region):
 602 |         if not HAS_BOTO3:
 603 |             self.fail_with_error("Working with RDS clusters requires boto3 - please install boto3 and try again",
 604 |                                  "getting RDS clusters")
 605 | 
 606 |         client = ec2_utils.boto3_inventory_conn('client', 'rds', region, **self.credentials)
 607 | 
 608 |         marker, clusters = '', []
 609 |         while marker is not None:
 610 |             resp = client.describe_db_clusters(Marker=marker)
 611 |             clusters.extend(resp["DBClusters"])
 612 |             marker = resp.get('Marker', None)
 613 | 
 614 |         account_id = boto.connect_iam().get_user().arn.split(':')[4]
 615 |         c_dict = {}
 616 |         for c in clusters:
 617 |             # remove these datetime objects as there is no serialisation to json
 618 |             # currently in place and we don't need the data yet
 619 |             if 'EarliestRestorableTime' in c:
 620 |                 del c['EarliestRestorableTime']
 621 |             if 'LatestRestorableTime' in c:
 622 |                 del c['LatestRestorableTime']
 623 | 
 624 |             if self.ec2_instance_filters == {}:
 625 |                 matches_filter = True
 626 |             else:
 627 |                 matches_filter = False
 628 | 
 629 |             try:
 630 |                 # arn:aws:rds:<region>:<account number>:<resourcetype>:<name>
 631 |                 tags = client.list_tags_for_resource(
 632 |                     ResourceName='arn:aws:rds:' + region + ':' + account_id + ':cluster:' + c['DBClusterIdentifier'])
 633 |                 c['Tags'] = tags['TagList']
 634 | 
 635 |                 if self.ec2_instance_filters:
 636 |                     for filter_key, filter_values in self.ec2_instance_filters.items():
 637 |                         # get AWS tag key e.g. tag:env will be 'env'
 638 |                         tag_name = filter_key.split(":", 1)[1]
 639 |                         # Filter values is a list (if you put multiple values for the same tag name)
 640 |                         matches_filter = any(d['Key'] == tag_name and d['Value'] in filter_values for d in c['Tags'])
 641 | 
 642 |                         if matches_filter:
 643 |                             # it matches a filter, so stop looking for further matches
 644 |                             break
 645 | 
 646 |             except Exception as e:
 647 |                 if e.message.find('DBInstanceNotFound') >= 0:
 648 |                     # AWS RDS bug (2016-01-06) means deletion does not fully complete and leave an 'empty' cluster.
 649 |                     # Ignore errors when trying to find tags for these
 650 |                     pass
 651 | 
 652 |             # ignore empty clusters caused by AWS bug
 653 |             if len(c['DBClusterMembers']) == 0:
 654 |                 continue
 655 |             elif matches_filter:
 656 |                 c_dict[c['DBClusterIdentifier']] = c
 657 | 
 658 |         self.inventory['db_clusters'] = c_dict
 659 | 
 660 |     def get_elasticache_clusters_by_region(self, region):
 661 |         ''' Makes an AWS API call to the list of ElastiCache clusters (with
 662 |         nodes' info) in a particular region.'''
 663 | 
 664 |         # ElastiCache boto module doesn't provide a get_all_intances method,
 665 |         # that's why we need to call describe directly (it would be called by
 666 |         # the shorthand method anyway...)
 667 |         try:
 668 |             conn = self.connect_to_aws(elasticache, region)
 669 |             if conn:
 670 |                 # show_cache_node_info = True
 671 |                 # because we also want nodes' information
 672 |                 response = conn.describe_cache_clusters(None, None, None, True)
 673 | 
 674 |         except boto.exception.BotoServerError as e:
 675 |             error = e.reason
 676 | 
 677 |             if e.error_code == 'AuthFailure':
 678 |                 error = self.get_auth_error_message()
 679 |             if not e.reason == "Forbidden":
 680 |                 error = "Looks like AWS ElastiCache is down:\n%s" % e.message
 681 |             self.fail_with_error(error, 'getting ElastiCache clusters')
 682 | 
 683 |         try:
 684 |             # Boto also doesn't provide wrapper classes to CacheClusters or
 685 |             # CacheNodes. Because of that we can't make use of the get_list
 686 |             # method in the AWSQueryConnection. Let's do the work manually
 687 |             clusters = response['DescribeCacheClustersResponse']['DescribeCacheClustersResult']['CacheClusters']
 688 | 
 689 |         except KeyError as e:
 690 |             error = "ElastiCache query to AWS failed (unexpected format)."
 691 |             self.fail_with_error(error, 'getting ElastiCache clusters')
 692 | 
 693 |         for cluster in clusters:
 694 |             self.add_elasticache_cluster(cluster, region)
 695 | 
 696 |     def get_elasticache_replication_groups_by_region(self, region):
 697 |         ''' Makes an AWS API call to the list of ElastiCache replication groups
 698 |         in a particular region.'''
 699 | 
 700 |         # ElastiCache boto module doesn't provide a get_all_intances method,
 701 |         # that's why we need to call describe directly (it would be called by
 702 |         # the shorthand method anyway...)
 703 |         try:
 704 |             conn = self.connect_to_aws(elasticache, region)
 705 |             if conn:
 706 |                 response = conn.describe_replication_groups()
 707 | 
 708 |         except boto.exception.BotoServerError as e:
 709 |             error = e.reason
 710 | 
 711 |             if e.error_code == 'AuthFailure':
 712 |                 error = self.get_auth_error_message()
 713 |             if not e.reason == "Forbidden":
 714 |                 error = "Looks like AWS ElastiCache [Replication Groups] is down:\n%s" % e.message
 715 |             self.fail_with_error(error, 'getting ElastiCache clusters')
 716 | 
 717 |         try:
 718 |             # Boto also doesn't provide wrapper classes to ReplicationGroups
 719 |             # Because of that we can't make use of the get_list method in the
 720 |             # AWSQueryConnection. Let's do the work manually
 721 |             replication_groups = response['DescribeReplicationGroupsResponse']['DescribeReplicationGroupsResult']['ReplicationGroups']
 722 | 
 723 |         except KeyError as e:
 724 |             error = "ElastiCache [Replication Groups] query to AWS failed (unexpected format)."
 725 |             self.fail_with_error(error, 'getting ElastiCache clusters')
 726 | 
 727 |         for replication_group in replication_groups:
 728 |             self.add_elasticache_replication_group(replication_group, region)
 729 | 
 730 |     def get_auth_error_message(self):
 731 |         ''' create an informative error message if there is an issue authenticating'''
 732 |         errors = ["Authentication error retrieving ec2 inventory."]
 733 |         if None in [os.environ.get('AWS_ACCESS_KEY_ID'), os.environ.get('AWS_SECRET_ACCESS_KEY')]:
 734 |             errors.append(' - No AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY environment vars found')
 735 |         else:
 736 |             errors.append(' - AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment vars found but may not be correct')
 737 | 
 738 |         boto_paths = ['/etc/boto.cfg', '~/.boto', '~/.aws/credentials']
 739 |         boto_config_found = list(p for p in boto_paths if os.path.isfile(os.path.expanduser(p)))
 740 |         if len(boto_config_found) > 0:
 741 |             errors.append(" - Boto configs found at '%s', but the credentials contained may not be correct" % ', '.join(boto_config_found))
 742 |         else:
 743 |             errors.append(" - No Boto config found at any expected location '%s'" % ', '.join(boto_paths))
 744 | 
 745 |         return '\n'.join(errors)
 746 | 
 747 |     def fail_with_error(self, err_msg, err_operation=None):
 748 |         '''log an error to std err for ansible-playbook to consume and exit'''
 749 |         if err_operation:
 750 |             err_msg = 'ERROR: "{err_msg}", while: {err_operation}'.format(
 751 |                 err_msg=err_msg, err_operation=err_operation)
 752 |         sys.stderr.write(err_msg)
 753 |         sys.exit(1)
 754 | 
 755 |     def get_instance(self, region, instance_id):
 756 |         conn = self.connect(region)
 757 | 
 758 |         reservations = conn.get_all_instances([instance_id])
 759 |         for reservation in reservations:
 760 |             for instance in reservation.instances:
 761 |                 return instance
 762 | 
 763 |     def add_instance(self, instance, region):
 764 |         ''' Adds an instance to the inventory and index, as long as it is
 765 |         addressable '''
 766 | 
 767 |         # Only return instances with desired instance states
 768 |         if instance.state not in self.ec2_instance_states:
 769 |             return
 770 | 
 771 |         # Select the best destination address
 772 |         if self.destination_format and self.destination_format_tags:
 773 |             dest = self.destination_format.format(*[ getattr(instance, 'tags').get(tag, '') for tag in self.destination_format_tags ])
 774 |         elif instance.subnet_id:
 775 |             dest = getattr(instance, self.vpc_destination_variable, None)
 776 |             if dest is None:
 777 |                 dest = getattr(instance, 'tags').get(self.vpc_destination_variable, None)
 778 |         else:
 779 |             dest = getattr(instance, self.destination_variable, None)
 780 |             if dest is None:
 781 |                 dest = getattr(instance, 'tags').get(self.destination_variable, None)
 782 | 
 783 |         if not dest:
 784 |             # Skip instances we cannot address (e.g. private VPC subnet)
 785 |             return
 786 | 
 787 |         # Set the inventory name
 788 |         hostname = None
 789 |         if self.hostname_variable:
 790 |             if self.hostname_variable.startswith('tag_'):
 791 |                 hostname = instance.tags.get(self.hostname_variable[4:], None)
 792 |             else:
 793 |                 hostname = getattr(instance, self.hostname_variable)
 794 | 
 795 |         # If we can't get a nice hostname, use the destination address
 796 |         if not hostname:
 797 |             hostname = dest
 798 |         else:
 799 |             hostname = self.to_safe(hostname).lower()
 800 | 
 801 |         # if we only want to include hosts that match a pattern, skip those that don't
 802 |         if self.pattern_include and not self.pattern_include.match(hostname):
 803 |             return
 804 | 
 805 |         # if we need to exclude hosts that match a pattern, skip those
 806 |         if self.pattern_exclude and self.pattern_exclude.match(hostname):
 807 |             return
 808 | 
 809 |         # Add to index
 810 |         self.index[hostname] = [region, instance.id]
 811 | 
 812 |         # Inventory: Group by instance ID (always a group of 1)
 813 |         if self.group_by_instance_id:
 814 |             self.inventory[instance.id] = [hostname]
 815 |             if self.nested_groups:
 816 |                 self.push_group(self.inventory, 'instances', instance.id)
 817 | 
 818 |         # Inventory: Group by region
 819 |         if self.group_by_region:
 820 |             self.push(self.inventory, region, hostname)
 821 |             if self.nested_groups:
 822 |                 self.push_group(self.inventory, 'regions', region)
 823 | 
 824 |         # Inventory: Group by availability zone
 825 |         if self.group_by_availability_zone:
 826 |             self.push(self.inventory, instance.placement, hostname)
 827 |             if self.nested_groups:
 828 |                 if self.group_by_region:
 829 |                     self.push_group(self.inventory, region, instance.placement)
 830 |                 self.push_group(self.inventory, 'zones', instance.placement)
 831 | 
 832 |         # Inventory: Group by Amazon Machine Image (AMI) ID
 833 |         if self.group_by_ami_id:
 834 |             ami_id = self.to_safe(instance.image_id)
 835 |             self.push(self.inventory, ami_id, hostname)
 836 |             if self.nested_groups:
 837 |                 self.push_group(self.inventory, 'images', ami_id)
 838 | 
 839 |         # Inventory: Group by instance type
 840 |         if self.group_by_instance_type:
 841 |             type_name = self.to_safe('type_' + instance.instance_type)
 842 |             self.push(self.inventory, type_name, hostname)
 843 |             if self.nested_groups:
 844 |                 self.push_group(self.inventory, 'types', type_name)
 845 | 
 846 |         # Inventory: Group by key pair
 847 |         if self.group_by_key_pair and instance.key_name:
 848 |             key_name = self.to_safe('key_' + instance.key_name)
 849 |             self.push(self.inventory, key_name, hostname)
 850 |             if self.nested_groups:
 851 |                 self.push_group(self.inventory, 'keys', key_name)
 852 | 
 853 |         # Inventory: Group by VPC
 854 |         if self.group_by_vpc_id and instance.vpc_id:
 855 |             vpc_id_name = self.to_safe('vpc_id_' + instance.vpc_id)
 856 |             self.push(self.inventory, vpc_id_name, hostname)
 857 |             if self.nested_groups:
 858 |                 self.push_group(self.inventory, 'vpcs', vpc_id_name)
 859 | 
 860 |         # Inventory: Group by security group
 861 |         if self.group_by_security_group:
 862 |             try:
 863 |                 for group in instance.groups:
 864 |                     key = self.to_safe("security_group_" + group.name)
 865 |                     self.push(self.inventory, key, hostname)
 866 |                     if self.nested_groups:
 867 |                         self.push_group(self.inventory, 'security_groups', key)
 868 |             except AttributeError:
 869 |                 self.fail_with_error('\n'.join(['Package boto seems a bit older.',
 870 |                                             'Please upgrade boto >= 2.3.0.']))
 871 | 
 872 |         # Inventory: Group by AWS account ID
 873 |         if self.group_by_aws_account:
 874 |             self.push(self.inventory, self.aws_account_id, dest)
 875 |             if self.nested_groups:
 876 |                 self.push_group(self.inventory, 'accounts', self.aws_account_id)
 877 | 
 878 |         # Inventory: Group by tag keys
 879 |         if self.group_by_tag_keys:
 880 |             for k, v in instance.tags.items():
 881 |                 if self.expand_csv_tags and v and ',' in v:
 882 |                     values = map(lambda x: x.strip(), v.split(','))
 883 |                 else:
 884 |                     values = [v]
 885 | 
 886 |                 for v in values:
 887 |                     if v:
 888 |                         key = self.to_safe("tag_" + k + "=" + v)
 889 |                     else:
 890 |                         key = self.to_safe("tag_" + k)
 891 |                     self.push(self.inventory, key, hostname)
 892 |                     if self.nested_groups:
 893 |                         self.push_group(self.inventory, 'tags', self.to_safe("tag_" + k))
 894 |                         if v:
 895 |                             self.push_group(self.inventory, self.to_safe("tag_" + k), key)
 896 | 
 897 |         # Inventory: Group by Route53 domain names if enabled
 898 |         if self.route53_enabled and self.group_by_route53_names:
 899 |             route53_names = self.get_instance_route53_names(instance)
 900 |             for name in route53_names:
 901 |                 self.push(self.inventory, name, hostname)
 902 |                 if self.nested_groups:
 903 |                     self.push_group(self.inventory, 'route53', name)
 904 | 
 905 |         # Global Tag: instances without tags
 906 |         if self.group_by_tag_none and len(instance.tags) == 0:
 907 |             self.push(self.inventory, 'tag_none', hostname)
 908 |             if self.nested_groups:
 909 |                 self.push_group(self.inventory, 'tags', 'tag_none')
 910 | 
 911 |         # Global Tag: tag all EC2 instances
 912 |         self.push(self.inventory, 'ec2', hostname)
 913 | 
 914 |         self.inventory["_meta"]["hostvars"][hostname] = self.get_host_info_dict_from_instance(instance)
 915 |         self.inventory["_meta"]["hostvars"][hostname]['ansible_ssh_host'] = dest
 916 | 
 917 | 
 918 |     def add_rds_instance(self, instance, region):
 919 |         ''' Adds an RDS instance to the inventory and index, as long as it is
 920 |         addressable '''
 921 | 
 922 |         # Only want available instances unless all_rds_instances is True
 923 |         if not self.all_rds_instances and instance.status != 'available':
 924 |             return
 925 | 
 926 |         # Select the best destination address
 927 |         dest = instance.endpoint[0]
 928 | 
 929 |         if not dest:
 930 |             # Skip instances we cannot address (e.g. private VPC subnet)
 931 |             return
 932 | 
 933 |         # Set the inventory name
 934 |         hostname = None
 935 |         if self.hostname_variable:
 936 |             if self.hostname_variable.startswith('tag_'):
 937 |                 hostname = instance.tags.get(self.hostname_variable[4:], None)
 938 |             else:
 939 |                 hostname = getattr(instance, self.hostname_variable)
 940 | 
 941 |         # If we can't get a nice hostname, use the destination address
 942 |         if not hostname:
 943 |             hostname = dest
 944 | 
 945 |         hostname = self.to_safe(hostname).lower()
 946 | 
 947 |         # Add to index
 948 |         self.index[hostname] = [region, instance.id]
 949 | 
 950 |         # Inventory: Group by instance ID (always a group of 1)
 951 |         if self.group_by_instance_id:
 952 |             self.inventory[instance.id] = [hostname]
 953 |             if self.nested_groups:
 954 |                 self.push_group(self.inventory, 'instances', instance.id)
 955 | 
 956 |         # Inventory: Group by region
 957 |         if self.group_by_region:
 958 |             self.push(self.inventory, region, hostname)
 959 |             if self.nested_groups:
 960 |                 self.push_group(self.inventory, 'regions', region)
 961 | 
 962 |         # Inventory: Group by availability zone
 963 |         if self.group_by_availability_zone:
 964 |             self.push(self.inventory, instance.availability_zone, hostname)
 965 |             if self.nested_groups:
 966 |                 if self.group_by_region:
 967 |                     self.push_group(self.inventory, region, instance.availability_zone)
 968 |                 self.push_group(self.inventory, 'zones', instance.availability_zone)
 969 | 
 970 |         # Inventory: Group by instance type
 971 |         if self.group_by_instance_type:
 972 |             type_name = self.to_safe('type_' + instance.instance_class)
 973 |             self.push(self.inventory, type_name, hostname)
 974 |             if self.nested_groups:
 975 |                 self.push_group(self.inventory, 'types', type_name)
 976 | 
 977 |         # Inventory: Group by VPC
 978 |         if self.group_by_vpc_id and instance.subnet_group and instance.subnet_group.vpc_id:
 979 |             vpc_id_name = self.to_safe('vpc_id_' + instance.subnet_group.vpc_id)
 980 |             self.push(self.inventory, vpc_id_name, hostname)
 981 |             if self.nested_groups:
 982 |                 self.push_group(self.inventory, 'vpcs', vpc_id_name)
 983 | 
 984 |         # Inventory: Group by security group
 985 |         if self.group_by_security_group:
 986 |             try:
 987 |                 if instance.security_group:
 988 |                     key = self.to_safe("security_group_" + instance.security_group.name)
 989 |                     self.push(self.inventory, key, hostname)
 990 |                     if self.nested_groups:
 991 |                         self.push_group(self.inventory, 'security_groups', key)
 992 | 
 993 |             except AttributeError:
 994 |                 self.fail_with_error('\n'.join(['Package boto seems a bit older.',
 995 |                                             'Please upgrade boto >= 2.3.0.']))
 996 | 
 997 | 
 998 |         # Inventory: Group by engine
 999 |         if self.group_by_rds_engine:
1000 |             self.push(self.inventory, self.to_safe("rds_" + instance.engine), hostname)
1001 |             if self.nested_groups:
1002 |                 self.push_group(self.inventory, 'rds_engines', self.to_safe("rds_" + instance.engine))
1003 | 
1004 |         # Inventory: Group by parameter group
1005 |         if self.group_by_rds_parameter_group:
1006 |             self.push(self.inventory, self.to_safe("rds_parameter_group_" + instance.parameter_group.name), hostname)
1007 |             if self.nested_groups:
1008 |                 self.push_group(self.inventory, 'rds_parameter_groups', self.to_safe("rds_parameter_group_" + instance.parameter_group.name))
1009 | 
1010 |         # Global Tag: all RDS instances
1011 |         self.push(self.inventory, 'rds', hostname)
1012 | 
1013 |         self.inventory["_meta"]["hostvars"][hostname] = self.get_host_info_dict_from_instance(instance)
1014 |         self.inventory["_meta"]["hostvars"][hostname]['ansible_ssh_host'] = dest
1015 | 
1016 |     def add_elasticache_cluster(self, cluster, region):
1017 |         ''' Adds an ElastiCache cluster to the inventory and index, as long as
1018 |         it's nodes are addressable '''
1019 | 
1020 |         # Only want available clusters unless all_elasticache_clusters is True
1021 |         if not self.all_elasticache_clusters and cluster['CacheClusterStatus'] != 'available':
1022 |             return
1023 | 
1024 |         # Select the best destination address
1025 |         if 'ConfigurationEndpoint' in cluster and cluster['ConfigurationEndpoint']:
1026 |             # Memcached cluster
1027 |             dest = cluster['ConfigurationEndpoint']['Address']
1028 |             is_redis = False
1029 |         else:
1030 |             # Redis sigle node cluster
1031 |             # Because all Redis clusters are single nodes, we'll merge the
1032 |             # info from the cluster with info about the node
1033 |             dest = cluster['CacheNodes'][0]['Endpoint']['Address']
1034 |             is_redis = True
1035 | 
1036 |         if not dest:
1037 |             # Skip clusters we cannot address (e.g. private VPC subnet)
1038 |             return
1039 | 
1040 |         # Add to index
1041 |         self.index[dest] = [region, cluster['CacheClusterId']]
1042 | 
1043 |         # Inventory: Group by instance ID (always a group of 1)
1044 |         if self.group_by_instance_id:
1045 |             self.inventory[cluster['CacheClusterId']] = [dest]
1046 |             if self.nested_groups:
1047 |                 self.push_group(self.inventory, 'instances', cluster['CacheClusterId'])
1048 | 
1049 |         # Inventory: Group by region
1050 |         if self.group_by_region and not is_redis:
1051 |             self.push(self.inventory, region, dest)
1052 |             if self.nested_groups:
1053 |                 self.push_group(self.inventory, 'regions', region)
1054 | 
1055 |         # Inventory: Group by availability zone
1056 |         if self.group_by_availability_zone and not is_redis:
1057 |             self.push(self.inventory, cluster['PreferredAvailabilityZone'], dest)
1058 |             if self.nested_groups:
1059 |                 if self.group_by_region:
1060 |                     self.push_group(self.inventory, region, cluster['PreferredAvailabilityZone'])
1061 |                 self.push_group(self.inventory, 'zones', cluster['PreferredAvailabilityZone'])
1062 | 
1063 |         # Inventory: Group by node type
1064 |         if self.group_by_instance_type and not is_redis:
1065 |             type_name = self.to_safe('type_' + cluster['CacheNodeType'])
1066 |             self.push(self.inventory, type_name, dest)
1067 |             if self.nested_groups:
1068 |                 self.push_group(self.inventory, 'types', type_name)
1069 | 
1070 |         # Inventory: Group by VPC (information not available in the current
1071 |         # AWS API version for ElastiCache)
1072 | 
1073 |         # Inventory: Group by security group
1074 |         if self.group_by_security_group and not is_redis:
1075 | 
1076 |             # Check for the existence of the 'SecurityGroups' key and also if
1077 |             # this key has some value. When the cluster is not placed in a SG
1078 |             # the query can return None here and cause an error.
1079 |             if 'SecurityGroups' in cluster and cluster['SecurityGroups'] is not None:
1080 |                 for security_group in cluster['SecurityGroups']:
1081 |                     key = self.to_safe("security_group_" + security_group['SecurityGroupId'])
1082 |                     self.push(self.inventory, key, dest)
1083 |                     if self.nested_groups:
1084 |                         self.push_group(self.inventory, 'security_groups', key)
1085 | 
1086 |         # Inventory: Group by engine
1087 |         if self.group_by_elasticache_engine and not is_redis:
1088 |             self.push(self.inventory, self.to_safe("elasticache_" + cluster['Engine']), dest)
1089 |             if self.nested_groups:
1090 |                 self.push_group(self.inventory, 'elasticache_engines', self.to_safe(cluster['Engine']))
1091 | 
1092 |         # Inventory: Group by parameter group
1093 |         if self.group_by_elasticache_parameter_group:
1094 |             self.push(self.inventory, self.to_safe("elasticache_parameter_group_" + cluster['CacheParameterGroup']['CacheParameterGroupName']), dest)
1095 |             if self.nested_groups:
1096 |                 self.push_group(self.inventory, 'elasticache_parameter_groups', self.to_safe(cluster['CacheParameterGroup']['CacheParameterGroupName']))
1097 | 
1098 |         # Inventory: Group by replication group
1099 |         if self.group_by_elasticache_replication_group and 'ReplicationGroupId' in cluster and cluster['ReplicationGroupId']:
1100 |             self.push(self.inventory, self.to_safe("elasticache_replication_group_" + cluster['ReplicationGroupId']), dest)
1101 |             if self.nested_groups:
1102 |                 self.push_group(self.inventory, 'elasticache_replication_groups', self.to_safe(cluster['ReplicationGroupId']))
1103 | 
1104 |         # Global Tag: all ElastiCache clusters
1105 |         self.push(self.inventory, 'elasticache_clusters', cluster['CacheClusterId'])
1106 | 
1107 |         host_info = self.get_host_info_dict_from_describe_dict(cluster)
1108 | 
1109 |         self.inventory["_meta"]["hostvars"][dest] = host_info
1110 | 
1111 |         # Add the nodes
1112 |         for node in cluster['CacheNodes']:
1113 |             self.add_elasticache_node(node, cluster, region)
1114 | 
1115 |     def add_elasticache_node(self, node, cluster, region):
1116 |         ''' Adds an ElastiCache node to the inventory and index, as long as
1117 |         it is addressable '''
1118 | 
1119 |         # Only want available nodes unless all_elasticache_nodes is True
1120 |         if not self.all_elasticache_nodes and node['CacheNodeStatus'] != 'available':
1121 |             return
1122 | 
1123 |         # Select the best destination address
1124 |         dest = node['Endpoint']['Address']
1125 | 
1126 |         if not dest:
1127 |             # Skip nodes we cannot address (e.g. private VPC subnet)
1128 |             return
1129 | 
1130 |         node_id = self.to_safe(cluster['CacheClusterId'] + '_' + node['CacheNodeId'])
1131 | 
1132 |         # Add to index
1133 |         self.index[dest] = [region, node_id]
1134 | 
1135 |         # Inventory: Group by node ID (always a group of 1)
1136 |         if self.group_by_instance_id:
1137 |             self.inventory[node_id] = [dest]
1138 |             if self.nested_groups:
1139 |                 self.push_group(self.inventory, 'instances', node_id)
1140 | 
1141 |         # Inventory: Group by region
1142 |         if self.group_by_region:
1143 |             self.push(self.inventory, region, dest)
1144 |             if self.nested_groups:
1145 |                 self.push_group(self.inventory, 'regions', region)
1146 | 
1147 |         # Inventory: Group by availability zone
1148 |         if self.group_by_availability_zone:
1149 |             self.push(self.inventory, cluster['PreferredAvailabilityZone'], dest)
1150 |             if self.nested_groups:
1151 |                 if self.group_by_region:
1152 |                     self.push_group(self.inventory, region, cluster['PreferredAvailabilityZone'])
1153 |                 self.push_group(self.inventory, 'zones', cluster['PreferredAvailabilityZone'])
1154 | 
1155 |         # Inventory: Group by node type
1156 |         if self.group_by_instance_type:
1157 |             type_name = self.to_safe('type_' + cluster['CacheNodeType'])
1158 |             self.push(self.inventory, type_name, dest)
1159 |             if self.nested_groups:
1160 |                 self.push_group(self.inventory, 'types', type_name)
1161 | 
1162 |         # Inventory: Group by VPC (information not available in the current
1163 |         # AWS API version for ElastiCache)
1164 | 
1165 |         # Inventory: Group by security group
1166 |         if self.group_by_security_group:
1167 | 
1168 |             # Check for the existence of the 'SecurityGroups' key and also if
1169 |             # this key has some value. When the cluster is not placed in a SG
1170 |             # the query can return None here and cause an error.
1171 |             if 'SecurityGroups' in cluster and cluster['SecurityGroups'] is not None:
1172 |                 for security_group in cluster['SecurityGroups']:
1173 |                     key = self.to_safe("security_group_" + security_group['SecurityGroupId'])
1174 |                     self.push(self.inventory, key, dest)
1175 |                     if self.nested_groups:
1176 |                         self.push_group(self.inventory, 'security_groups', key)
1177 | 
1178 |         # Inventory: Group by engine
1179 |         if self.group_by_elasticache_engine:
1180 |             self.push(self.inventory, self.to_safe("elasticache_" + cluster['Engine']), dest)
1181 |             if self.nested_groups:
1182 |                 self.push_group(self.inventory, 'elasticache_engines', self.to_safe("elasticache_" + cluster['Engine']))
1183 | 
1184 |         # Inventory: Group by parameter group (done at cluster level)
1185 | 
1186 |         # Inventory: Group by replication group (done at cluster level)
1187 | 
1188 |         # Inventory: Group by ElastiCache Cluster
1189 |         if self.group_by_elasticache_cluster:
1190 |             self.push(self.inventory, self.to_safe("elasticache_cluster_" + cluster['CacheClusterId']), dest)
1191 | 
1192 |         # Global Tag: all ElastiCache nodes
1193 |         self.push(self.inventory, 'elasticache_nodes', dest)
1194 | 
1195 |         host_info = self.get_host_info_dict_from_describe_dict(node)
1196 | 
1197 |         if dest in self.inventory["_meta"]["hostvars"]:
1198 |             self.inventory["_meta"]["hostvars"][dest].update(host_info)
1199 |         else:
1200 |             self.inventory["_meta"]["hostvars"][dest] = host_info
1201 | 
1202 |     def add_elasticache_replication_group(self, replication_group, region):
1203 |         ''' Adds an ElastiCache replication group to the inventory and index '''
1204 | 
1205 |         # Only want available clusters unless all_elasticache_replication_groups is True
1206 |         if not self.all_elasticache_replication_groups and replication_group['Status'] != 'available':
1207 |             return
1208 | 
1209 |         # Select the best destination address (PrimaryEndpoint)
1210 |         dest = replication_group['NodeGroups'][0]['PrimaryEndpoint']['Address']
1211 | 
1212 |         if not dest:
1213 |             # Skip clusters we cannot address (e.g. private VPC subnet)
1214 |             return
1215 | 
1216 |         # Add to index
1217 |         self.index[dest] = [region, replication_group['ReplicationGroupId']]
1218 | 
1219 |         # Inventory: Group by ID (always a group of 1)
1220 |         if self.group_by_instance_id:
1221 |             self.inventory[replication_group['ReplicationGroupId']] = [dest]
1222 |             if self.nested_groups:
1223 |                 self.push_group(self.inventory, 'instances', replication_group['ReplicationGroupId'])
1224 | 
1225 |         # Inventory: Group by region
1226 |         if self.group_by_region:
1227 |             self.push(self.inventory, region, dest)
1228 |             if self.nested_groups:
1229 |                 self.push_group(self.inventory, 'regions', region)
1230 | 
1231 |         # Inventory: Group by availability zone (doesn't apply to replication groups)
1232 | 
1233 |         # Inventory: Group by node type (doesn't apply to replication groups)
1234 | 
1235 |         # Inventory: Group by VPC (information not available in the current
1236 |         # AWS API version for replication groups
1237 | 
1238 |         # Inventory: Group by security group (doesn't apply to replication groups)
1239 |         # Check this value in cluster level
1240 | 
1241 |         # Inventory: Group by engine (replication groups are always Redis)
1242 |         if self.group_by_elasticache_engine:
1243 |             self.push(self.inventory, 'elasticache_redis', dest)
1244 |             if self.nested_groups:
1245 |                 self.push_group(self.inventory, 'elasticache_engines', 'redis')
1246 | 
1247 |         # Global Tag: all ElastiCache clusters
1248 |         self.push(self.inventory, 'elasticache_replication_groups', replication_group['ReplicationGroupId'])
1249 | 
1250 |         host_info = self.get_host_info_dict_from_describe_dict(replication_group)
1251 | 
1252 |         self.inventory["_meta"]["hostvars"][dest] = host_info
1253 | 
1254 |     def get_route53_records(self):
1255 |         ''' Get and store the map of resource records to domain names that
1256 |         point to them. '''
1257 | 
1258 |         r53_conn = route53.Route53Connection()
1259 |         all_zones = r53_conn.get_zones()
1260 | 
1261 |         route53_zones = [ zone for zone in all_zones if zone.name[:-1]
1262 |                           not in self.route53_excluded_zones ]
1263 | 
1264 |         self.route53_records = {}
1265 | 
1266 |         for zone in route53_zones:
1267 |             rrsets = r53_conn.get_all_rrsets(zone.id)
1268 | 
1269 |             for record_set in rrsets:
1270 |                 record_name = record_set.name
1271 | 
1272 |                 if record_name.endswith('.'):
1273 |                     record_name = record_name[:-1]
1274 | 
1275 |                 for resource in record_set.resource_records:
1276 |                     self.route53_records.setdefault(resource, set())
1277 |                     self.route53_records[resource].add(record_name)
1278 | 
1279 | 
1280 |     def get_instance_route53_names(self, instance):
1281 |         ''' Check if an instance is referenced in the records we have from
1282 |         Route53. If it is, return the list of domain names pointing to said
1283 |         instance. If nothing points to it, return an empty list. '''
1284 | 
1285 |         instance_attributes = [ 'public_dns_name', 'private_dns_name',
1286 |                                 'ip_address', 'private_ip_address' ]
1287 | 
1288 |         name_list = set()
1289 | 
1290 |         for attrib in instance_attributes:
1291 |             try:
1292 |                 value = getattr(instance, attrib)
1293 |             except AttributeError:
1294 |                 continue
1295 | 
1296 |             if value in self.route53_records:
1297 |                 name_list.update(self.route53_records[value])
1298 | 
1299 |         return list(name_list)
1300 | 
1301 |     def get_host_info_dict_from_instance(self, instance):
1302 |         instance_vars = {}
1303 |         for key in vars(instance):
1304 |             value = getattr(instance, key)
1305 |             key = self.to_safe('ec2_' + key)
1306 | 
1307 |             # Handle complex types
1308 |             # state/previous_state changed to properties in boto in https://github.com/boto/boto/commit/a23c379837f698212252720d2af8dec0325c9518
1309 |             if key == 'ec2__state':
1310 |                 instance_vars['ec2_state'] = instance.state or ''
1311 |                 instance_vars['ec2_state_code'] = instance.state_code
1312 |             elif key == 'ec2__previous_state':
1313 |                 instance_vars['ec2_previous_state'] = instance.previous_state or ''
1314 |                 instance_vars['ec2_previous_state_code'] = instance.previous_state_code
1315 |             elif type(value) in [int, bool]:
1316 |                 instance_vars[key] = value
1317 |             elif isinstance(value, six.string_types):
1318 |                 instance_vars[key] = value.strip()
1319 |             elif type(value) == type(None):
1320 |                 instance_vars[key] = ''
1321 |             elif key == 'ec2_region':
1322 |                 instance_vars[key] = value.name
1323 |             elif key == 'ec2__placement':
1324 |                 instance_vars['ec2_placement'] = value.zone
1325 |             elif key == 'ec2_tags':
1326 |                 for k, v in value.items():
1327 |                     if self.expand_csv_tags and ',' in v:
1328 |                         v = list(map(lambda x: x.strip(), v.split(',')))
1329 |                     key = self.to_safe('ec2_tag_' + k)
1330 |                     instance_vars[key] = v
1331 |             elif key == 'ec2_groups':
1332 |                 group_ids = []
1333 |                 group_names = []
1334 |                 for group in value:
1335 |                     group_ids.append(group.id)
1336 |                     group_names.append(group.name)
1337 |                 instance_vars["ec2_security_group_ids"] = ','.join([str(i) for i in group_ids])
1338 |                 instance_vars["ec2_security_group_names"] = ','.join([str(i) for i in group_names])
1339 |             elif key == 'ec2_block_device_mapping':
1340 |                 instance_vars["ec2_block_devices"] = {}
1341 |                 for k, v in value.items():
1342 |                     instance_vars["ec2_block_devices"][ os.path.basename(k) ] = v.volume_id
1343 |             else:
1344 |                 pass
1345 |                 # TODO Product codes if someone finds them useful
1346 |                 #print key
1347 |                 #print type(value)
1348 |                 #print value
1349 | 
1350 |         instance_vars[self.to_safe('ec2_account_id')] = self.aws_account_id
1351 | 
1352 |         return instance_vars
1353 | 
1354 |     def get_host_info_dict_from_describe_dict(self, describe_dict):
1355 |         ''' Parses the dictionary returned by the API call into a flat list
1356 |             of parameters. This method should be used only when 'describe' is
1357 |             used directly because Boto doesn't provide specific classes. '''
1358 | 
1359 |         # I really don't agree with prefixing everything with 'ec2'
1360 |         # because EC2, RDS and ElastiCache are different services.
1361 |         # I'm just following the pattern used until now to not break any
1362 |         # compatibility.
1363 | 
1364 |         host_info = {}
1365 |         for key in describe_dict:
1366 |             value = describe_dict[key]
1367 |             key = self.to_safe('ec2_' + self.uncammelize(key))
1368 | 
1369 |             # Handle complex types
1370 | 
1371 |             # Target: Memcached Cache Clusters
1372 |             if key == 'ec2_configuration_endpoint' and value:
1373 |                 host_info['ec2_configuration_endpoint_address'] = value['Address']
1374 |                 host_info['ec2_configuration_endpoint_port'] = value['Port']
1375 | 
1376 |             # Target: Cache Nodes and Redis Cache Clusters (single node)
1377 |             if key == 'ec2_endpoint' and value:
1378 |                 host_info['ec2_endpoint_address'] = value['Address']
1379 |                 host_info['ec2_endpoint_port'] = value['Port']
1380 | 
1381 |             # Target: Redis Replication Groups
1382 |             if key == 'ec2_node_groups' and value:
1383 |                 host_info['ec2_endpoint_address'] = value[0]['PrimaryEndpoint']['Address']
1384 |                 host_info['ec2_endpoint_port'] = value[0]['PrimaryEndpoint']['Port']
1385 |                 replica_count = 0
1386 |                 for node in value[0]['NodeGroupMembers']:
1387 |                     if node['CurrentRole'] == 'primary':
1388 |                         host_info['ec2_primary_cluster_address'] = node['ReadEndpoint']['Address']
1389 |                         host_info['ec2_primary_cluster_port'] = node['ReadEndpoint']['Port']
1390 |                         host_info['ec2_primary_cluster_id'] = node['CacheClusterId']
1391 |                     elif node['CurrentRole'] == 'replica':
1392 |                         host_info['ec2_replica_cluster_address_'+ str(replica_count)] = node['ReadEndpoint']['Address']
1393 |                         host_info['ec2_replica_cluster_port_'+ str(replica_count)] = node['ReadEndpoint']['Port']
1394 |                         host_info['ec2_replica_cluster_id_'+ str(replica_count)] = node['CacheClusterId']
1395 |                         replica_count += 1
1396 | 
1397 |             # Target: Redis Replication Groups
1398 |             if key == 'ec2_member_clusters' and value:
1399 |                 host_info['ec2_member_clusters'] = ','.join([str(i) for i in value])
1400 | 
1401 |             # Target: All Cache Clusters
1402 |             elif key == 'ec2_cache_parameter_group':
1403 |                 host_info["ec2_cache_node_ids_to_reboot"] = ','.join([str(i) for i in value['CacheNodeIdsToReboot']])
1404 |                 host_info['ec2_cache_parameter_group_name'] = value['CacheParameterGroupName']
1405 |                 host_info['ec2_cache_parameter_apply_status'] = value['ParameterApplyStatus']
1406 | 
1407 |             # Target: Almost everything
1408 |             elif key == 'ec2_security_groups':
1409 | 
1410 |                 # Skip if SecurityGroups is None
1411 |                 # (it is possible to have the key defined but no value in it).
1412 |                 if value is not None:
1413 |                     sg_ids = []
1414 |                     for sg in value:
1415 |                         sg_ids.append(sg['SecurityGroupId'])
1416 |                     host_info["ec2_security_group_ids"] = ','.join([str(i) for i in sg_ids])
1417 | 
1418 |             # Target: Everything
1419 |             # Preserve booleans and integers
1420 |             elif type(value) in [int, bool]:
1421 |                 host_info[key] = value
1422 | 
1423 |             # Target: Everything
1424 |             # Sanitize string values
1425 |             elif isinstance(value, six.string_types):
1426 |                 host_info[key] = value.strip()
1427 | 
1428 |             # Target: Everything
1429 |             # Replace None by an empty string
1430 |             elif type(value) == type(None):
1431 |                 host_info[key] = ''
1432 | 
1433 |             else:
1434 |                 # Remove non-processed complex types
1435 |                 pass
1436 | 
1437 |         return host_info
1438 | 
1439 |     def get_host_info(self):
1440 |         ''' Get variables about a specific host '''
1441 | 
1442 |         if len(self.index) == 0:
1443 |             # Need to load index from cache
1444 |             self.load_index_from_cache()
1445 | 
1446 |         if not self.args.host in self.index:
1447 |             # try updating the cache
1448 |             self.do_api_calls_update_cache()
1449 |             if not self.args.host in self.index:
1450 |                 # host might not exist anymore
1451 |                 return self.json_format_dict({}, True)
1452 | 
1453 |         (region, instance_id) = self.index[self.args.host]
1454 | 
1455 |         instance = self.get_instance(region, instance_id)
1456 |         return self.json_format_dict(self.get_host_info_dict_from_instance(instance), True)
1457 | 
1458 |     def push(self, my_dict, key, element):
1459 |         ''' Push an element onto an array that may not have been defined in
1460 |         the dict '''
1461 |         group_info = my_dict.setdefault(key, [])
1462 |         if isinstance(group_info, dict):
1463 |             host_list = group_info.setdefault('hosts', [])
1464 |             host_list.append(element)
1465 |         else:
1466 |             group_info.append(element)
1467 | 
1468 |     def push_group(self, my_dict, key, element):
1469 |         ''' Push a group as a child of another group. '''
1470 |         parent_group = my_dict.setdefault(key, {})
1471 |         if not isinstance(parent_group, dict):
1472 |             parent_group = my_dict[key] = {'hosts': parent_group}
1473 |         child_groups = parent_group.setdefault('children', [])
1474 |         if element not in child_groups:
1475 |             child_groups.append(element)
1476 | 
1477 |     def get_inventory_from_cache(self):
1478 |         ''' Reads the inventory from the cache file and returns it as a JSON
1479 |         object '''
1480 | 
1481 |         cache = open(self.cache_path_cache, 'r')
1482 |         json_inventory = cache.read()
1483 |         return json_inventory
1484 | 
1485 | 
1486 |     def load_index_from_cache(self):
1487 |         ''' Reads the index from the cache file sets self.index '''
1488 | 
1489 |         cache = open(self.cache_path_index, 'r')
1490 |         json_index = cache.read()
1491 |         self.index = json.loads(json_index)
1492 | 
1493 | 
1494 |     def write_to_cache(self, data, filename):
1495 |         ''' Writes data in JSON format to a file '''
1496 | 
1497 |         json_data = self.json_format_dict(data, True)
1498 |         cache = open(filename, 'w')
1499 |         cache.write(json_data)
1500 |         cache.close()
1501 | 
1502 |     def uncammelize(self, key):
1503 |         temp = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', key)
1504 |         return re.sub('([a-z0-9])([A-Z])', r'\1_\2', temp).lower()
1505 | 
1506 |     def to_safe(self, word):
1507 |         ''' Converts 'bad' characters in a string to underscores so they can be used as Ansible groups '''
1508 |         regex = "[^A-Za-z0-9\_"
1509 |         if not self.replace_dash_in_groups:
1510 |             regex += "\-"
1511 |         return re.sub(regex + "]", "_", word)
1512 | 
1513 |     def json_format_dict(self, data, pretty=False):
1514 |         ''' Converts a dict to a JSON object and dumps it as a formatted
1515 |         string '''
1516 | 
1517 |         if pretty:
1518 |             return json.dumps(data, sort_keys=True, indent=2)
1519 |         else:
1520 |             return json.dumps(data)
1521 | 
1522 | 
1523 | # Run the script
1524 | Ec2Inventory()


--------------------------------------------------------------------------------