├── logs
    └── .keep
├── ecosystem
├── tests
    ├── __init__.py
    ├── tools
    │   ├── __init__.py
    │   ├── introspector
    │   │   └── __init__.py
    │   ├── _glob_util_test.py
    │   ├── schema_ref_test.py
    │   ├── meteorite_wrappers_test.py
    │   ├── sensu_ttl_alerter_test.py
    │   ├── meteorite_gauge_manager_test.py
    │   └── sensu_alert_manager_test.py
    ├── benchmarks
    │   ├── __init__.py
    │   ├── _fast_uuid_test.py
    │   ├── envelope_test.py
    │   ├── producer_test.py
    │   ├── logging_test.py
    │   └── message_test.py
    ├── consumer
    │   └── __init__.py
    ├── factories
    │   ├── __init__.py
    │   └── base_factory.py
    ├── helpers
    │   ├── __init__.py
    │   ├── mock_utils.py
    │   ├── config.py
    │   └── decorators_test.py
    ├── schematizer_clientlib
    │   └── __init__.py
    ├── team_test.py
    ├── initialization_vector_test.py
    ├── _fast_uuid_test.py
    ├── client_test.py
    ├── _retry_util_test.py
    └── envelope_test.py
├── .ruby-version
├── debian
    ├── compat
    ├── .gitignore
    ├── control
    ├── data-pipeline-tools.links
    └── rules
├── data_pipeline
    ├── tools
    │   ├── .ipython
    │   ├── __init__.py
    │   ├── introspector
    │   │   ├── __init__.py
    │   │   ├── info
    │   │   │   ├── __init__.py
    │   │   │   ├── topic.py
    │   │   │   ├── namespace.py
    │   │   │   └── source.py
    │   │   ├── register
    │   │   │   ├── __init__.py
    │   │   │   ├── base_command.py
    │   │   │   ├── mysql_command.py
    │   │   │   └── avro_command.py
    │   │   ├── list_command
    │   │   │   ├── __init__.py
    │   │   │   ├── base_command.py
    │   │   │   ├── topics.py
    │   │   │   ├── namespaces.py
    │   │   │   └── sources.py
    │   │   ├── models
    │   │   │   ├── __init__.py
    │   │   │   ├── schema.py
    │   │   │   ├── source.py
    │   │   │   ├── namespace.py
    │   │   │   ├── base.py
    │   │   │   └── topic.py
    │   │   ├── register_command_parser.py
    │   │   ├── info_command_parser.py
    │   │   ├── list_command_parser.py
    │   │   ├── main.py
    │   │   └── schema_check_command.py
    │   ├── _glob_util.py
    │   ├── sensu_ttl_alerter.py
    │   ├── meteorite_gauge_manager.py
    │   ├── heartbeat_periodic_processor.py
    │   └── binlog_analyzer.py
    ├── data_pipeline.py
    ├── helpers
    │   ├── __init__.py
    │   ├── lists.py
    │   ├── log.py
    │   ├── singleton.py
    │   ├── frozendict_json_encoder.py
    │   ├── decorators.py
    │   └── yelp_avro_store.py
    ├── servlib
    │   └── __init__.py
    ├── testing_helpers
    │   ├── __init__.py
    │   ├── docker-compose.yml
    │   ├── docker-compose-opensource.yml
    │   └── kafka_docker.py
    ├── schematizer_clientlib
    │   ├── __init__.py
    │   └── models
    │   │   ├── __init__.py
    │   │   ├── model_base.py
    │   │   ├── data_source_type_enum.py
    │   │   ├── target_schema_type_enum.py
    │   │   ├── namespace.py
    │   │   ├── meta_attr_source_mapping.py
    │   │   ├── meta_attr_namespace_mapping.py
    │   │   ├── data_target.py
    │   │   ├── consumer_group_data_source.py
    │   │   ├── source.py
    │   │   ├── consumer_group.py
    │   │   └── avro_schema_element.py
    ├── schemas
    │   ├── initialization_vector_v1.avsc
    │   ├── monitoring_message_v1.avsc
    │   ├── registration_message_v1.avsc
    │   └── envelope_v1.avsc
    ├── __init__.py
    ├── environment_configs.py
    ├── publish_guarantee.py
    ├── _clog_writer.py
    ├── _consumer_tick.py
    ├── initialization_vector.py
    ├── team.py
    ├── expected_frequency.py
    ├── message_type.py
    └── _kafka_util.py
├── setup.cfg
├── docs
    ├── history.rst
    ├── usage.rst
    ├── contributing.rst
    ├── installation.rst
    └── index.rst
├── requirements.d
    ├── pre_commit.txt
    ├── tools.txt
    ├── dev.txt
    └── dev-internal.txt
├── zookeeper_discoverydevc.yaml
├── Gemfile
├── teams.yaml
├── bin
    ├── data_pipeline_tailer
    ├── data_pipeline_introspector
    ├── data_pipeline_compaction_setter
    ├── data_pipeline_refresh_manager
    ├── data_pipeline_refresh_requester
    └── data_pipeline_refresh_runner
├── docker-compose.yml
├── .rat-excludes
├── .dockerignore
├── NOTICE
├── travis.yml
├── MANIFEST.in
├── .travis.yml
├── Procfile
├── HISTORY.rst
├── USAGE.rst
├── yelp_package
    ├── dockerfiles
    │   ├── lucid
    │   │   └── Dockerfile
    │   └── trusty
    │   │   └── Dockerfile
    ├── Makefile
    └── itest
    │   └── ubuntu.sh
├── .coveragerc
├── run_guard.sh
├── .pre-commit-config.yaml
├── setup_bundles.sh
├── .gitignore
├── Gemfile.lock
├── Dockerfile
├── Makefile-opensource
├── tox-opensource.ini
├── Guardfile
├── requirements.txt
├── tox.ini
├── Makefile
├── key-1.key
└── CONTRIBUTING.rst


/logs/.keep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ecosystem:
--------------------------------------------------------------------------------
1 | devc
2 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.ruby-version:
--------------------------------------------------------------------------------
1 | 2.2.2
2 | 


--------------------------------------------------------------------------------
/debian/compat:
--------------------------------------------------------------------------------
1 | 7
2 | 


--------------------------------------------------------------------------------
/tests/tools/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/.ipython:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/benchmarks/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/consumer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/factories/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/helpers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data_pipeline/data_pipeline.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data_pipeline/helpers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data_pipeline/servlib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [wheel]
2 | universal = 1


--------------------------------------------------------------------------------
/tests/tools/introspector/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data_pipeline/testing_helpers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/schematizer_clientlib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data_pipeline/schematizer_clientlib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/introspector/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/history.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../HISTORY.rst
2 | 


--------------------------------------------------------------------------------
/docs/usage.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../USAGE.rst
2 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/introspector/info/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/introspector/register/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.d/pre_commit.txt:
--------------------------------------------------------------------------------
1 | pre-commit>=0.4.2
2 | 


--------------------------------------------------------------------------------
/data_pipeline/schematizer_clientlib/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/introspector/list_command/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/contributing.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../CONTRIBUTING.rst
2 | 


--------------------------------------------------------------------------------
/zookeeper_discoverydevc.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | - - zk
3 |   - 2181
4 | 


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source 'https://rubygems.org'
2 | 
3 | gem 'foreman'
4 | gem 'guard'
5 | 


--------------------------------------------------------------------------------
/debian/.gitignore:
--------------------------------------------------------------------------------
1 | data-pipeline-tools.substvars
2 | data-pipeline-tools/
3 | files
4 | 


--------------------------------------------------------------------------------
/requirements.d/tools.txt:
--------------------------------------------------------------------------------
1 | psutil==4.2.0
2 | sqlparse
3 | yelp_batch
4 | yelp_clog==2.5.2
5 | yelp_conn
6 | 


--------------------------------------------------------------------------------
/teams.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | teams:
3 |   bam:
4 |     email: team1@yelp.com
5 |   team2:
6 |     email: team2@yelp.com
7 | 


--------------------------------------------------------------------------------
/bin/data_pipeline_tailer:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from data_pipeline.tools.tailer import Tailer
3 | 
4 | Tailer().start()
5 | 


--------------------------------------------------------------------------------
/bin/data_pipeline_introspector:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from data_pipeline.tools.introspector.main import run
3 | 
4 | run()
5 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | pypy:
2 |   build: .
3 |   links:
4 |     - zookeeper:zk
5 |     - kafka:kafka
6 |     - schematizer:schematizer
7 | 


--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
1 | ============
2 | Installation
3 | ============
4 | 
5 | At the command line either via pip::
6 | 
7 |     $ pip install data_pipeline
8 | 


--------------------------------------------------------------------------------
/bin/data_pipeline_compaction_setter:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from data_pipeline.tools.compaction_setter import CompactionSetter
3 | 
4 | CompactionSetter().start()
5 | 


--------------------------------------------------------------------------------
/bin/data_pipeline_refresh_manager:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from data_pipeline.tools.refresh_manager import FullRefreshManager
3 | 
4 | FullRefreshManager().start()
5 | 


--------------------------------------------------------------------------------
/.rat-excludes:
--------------------------------------------------------------------------------
 1 | build
 2 | dist
 3 | venv
 4 | .tox
 5 | data_pipeline.egg-info/
 6 | __pycache__
 7 | .cache/
 8 | logs
 9 | debian
10 | .pip
11 | .distlib
12 | bin
13 | 


--------------------------------------------------------------------------------
/bin/data_pipeline_refresh_requester:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from data_pipeline.tools.refresh_requester import FullRefreshRequester
3 | 
4 | FullRefreshRequester().start()
5 | 


--------------------------------------------------------------------------------
/bin/data_pipeline_refresh_runner:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from data_pipeline.tools.copy_table_to_blackhole_table import FullRefreshRunner
3 | 
4 | FullRefreshRunner().start()
5 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | .git
 2 | .gitignore
 3 | .tox
 4 | venv
 5 | virtualenv_run
 6 | build
 7 | dist
 8 | *.pyc
 9 | *.log
10 | *.swp
11 | Dockerfile
12 | docker-compose.yml
13 | .docker
14 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Apache Data Pipeline
2 | Copyright 2016 The Apache Software Foundation
3 | 
4 | This product includes software developed at
5 | The Apache Software Foundation (http://www.apache.org/).
6 | 


--------------------------------------------------------------------------------
/data_pipeline/schemas/initialization_vector_v1.avsc:
--------------------------------------------------------------------------------
1 | {
2 |   "type": "fixed",
3 |   "size": 16,
4 |   "namespace": "yelp.data_pipeline",
5 |   "name": "initialization_vector",
6 |   "doc": "Serializes an initalization vector for encrypting PII."
7 | }
8 | 


--------------------------------------------------------------------------------
/travis.yml:
--------------------------------------------------------------------------------
 1 | branches:
 2 |   only:
 3 |     - master
 4 |     - "/^v[0-9.]+$/"
 5 | language: python
 6 | python:
 7 |   - '2.7'
 8 | install: pip install coveralls tox
 9 | script: make -f Makefile-opensource test
10 | after_success:
11 |   - coveralls
12 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include data_pipeline/schemas/envelope_v1.avsc
2 | include data_pipeline/schemas/monitoring_message_v1.avsc
3 | include data_pipeline/testing_helpers/docker-compose.yml
4 | include CONTRIBUTING.rst
5 | include HISTORY.rst
6 | include README.md
7 | include USAGE.rst
8 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | branches:
 2 |   only:
 3 |   - master
 4 |   - /^v[0-9.]+$/
 5 | language: python
 6 | services:
 7 |   - docker
 8 | python:
 9 |   - '2.7'
10 | install: pip install coveralls tox
11 | script:
12 |   - make -f Makefile-opensource test
13 | after_success:
14 |   - coveralls
15 | 


--------------------------------------------------------------------------------
/Procfile:
--------------------------------------------------------------------------------
1 | # docs: python -m SimpleHTTPServer 8001
2 | docs: twistd -no web -p 8001 --path=.
3 | kafka: tox -e devenv-command "$(make compose-prefix) kill" && tox -e devenv-command "$(make compose-prefix) rm --force" && tox -e devenv-command "$(make compose-prefix) up kafka schematizer"
4 | 


--------------------------------------------------------------------------------
/HISTORY.rst:
--------------------------------------------------------------------------------
 1 | .. :changelog:
 2 | 
 3 | History
 4 | -------
 5 | 
 6 | 0.1.4 (2015-08-12)
 7 | ++++++++++++++++++
 8 | 
 9 | * Defined consumer/producer registration API
10 | 
11 | 0.1.3 (2015-08-10)
12 | ++++++++++++++++++
13 | 
14 | * Added keys kwargs to data pipeline messages
15 | 
16 | 0.1.0 (2015-03-01)
17 | ++++++++++++++++++
18 | 
19 | * First release.
20 | 


--------------------------------------------------------------------------------
/debian/control:
--------------------------------------------------------------------------------
 1 | Source: data-pipeline-tools
 2 | Section: python
 3 | Priority: optional
 4 | Maintainer: Justin Cunningham <justinc@yelp.com>
 5 | Build-Depends: debhelper (>= 7), python (>= 2.7), dh-virtualenv, libffi-dev, libssl0.9.8
 6 | Standards-Version: 3.8.3
 7 | 
 8 | Package: data-pipeline-tools
 9 | Architecture: any
10 | Depends: ${shlibs:Depends}, ${misc:Depends}, libssl0.9.8
11 | Description: Data pipeline clientlib tools
12 | 


--------------------------------------------------------------------------------
/requirements.d/dev.txt:
--------------------------------------------------------------------------------
 1 | # Installs the package with the testing_helpers extras
 2 | -e .[testing_helpers]
 3 | # TODO(DATAPIPE-167): Add the packages that don't exist yet to the interal Pypi
 4 | # and remove this extra-index-url
 5 | --extra-index-url https://pypi.python.org/simple/
 6 | flake8
 7 | honcho
 8 | ipdb
 9 | ipython
10 | mock
11 | pre-commit>=0.4.2
12 | pytest<2.8.2
13 | pytest-cov==1.8.1
14 | pytest-raisesregexp
15 | Sphinx==1.3.1
16 | SQLAlchemy==1.0.8
17 | terminaltables>=1.0.2
18 | wheel>=0.22
19 | factory_boy==2.5.2
20 | 


--------------------------------------------------------------------------------
/debian/data-pipeline-tools.links:
--------------------------------------------------------------------------------
1 | opt/venvs/data-pipeline-tools/bin/data_pipeline_tailer usr/bin/data_pipeline_tailer
2 | opt/venvs/data-pipeline-tools/bin/data_pipeline_refresh_runner usr/bin/data_pipeline_refresh_runner
3 | opt/venvs/data-pipeline-tools/bin/data_pipeline_refresh_manager usr/bin/data_pipeline_refresh_manager
4 | opt/venvs/data-pipeline-tools/bin/data_pipeline_refresh_requester usr/bin/data_pipeline_refresh_requester
5 | opt/venvs/data-pipeline-tools/bin/data_pipeline_compaction_setter usr/bin/data_pipeline_compaction_setter
6 | opt/venvs/data-pipeline-tools/bin/data_pipeline_introspector usr/bin/data_pipeline_introspector
7 | 


--------------------------------------------------------------------------------
/requirements.d/dev-internal.txt:
--------------------------------------------------------------------------------
 1 | # Installs the package with the testing_helpers, tools and internal extras
 2 | -e .[testing_helpers, tools, internal]
 3 | # TODO(DATAPIPE-167): Add the packages that don't exist yet to the interal Pypi
 4 | # and remove this extra-index-url
 5 | --extra-index-url https://pypi.python.org/simple/
 6 | flake8
 7 | honcho
 8 | ipdb
 9 | ipython
10 | mock
11 | pre-commit>=0.4.2
12 | # Later versions should be ok once INFRA-3779 is fixed
13 | pytest<2.8.2
14 | pytest-cov==1.8.1
15 | pytest-raisesregexp
16 | Sphinx==1.3.1
17 | SQLAlchemy==1.0.8
18 | terminaltables>=1.0.2
19 | wheel>=0.22
20 | factory_boy==2.5.2
21 | pytest-benchmark==3.0.0
22 | 


--------------------------------------------------------------------------------
/USAGE.rst:
--------------------------------------------------------------------------------
 1 | ========
 2 | Usage
 3 | ========
 4 | 
 5 | To use Data Pipeline Clientlib in a project::
 6 | 
 7 |     >>> import data_pipeline
 8 | 
 9 | To use a Consumer::
10 | 
11 |     >>> from data_pipeline.consumer import Consumer
12 |     >>> from data_pipeline.expected_frequency import ExpectedFrequency
13 |     >>> Consumer(
14 |     ...     'test',
15 |     ...     'bam',
16 |     ...     ExpectedFrequency.constantly,
17 |     ...     {'topic_name': None}
18 |     ... ) # doctest: +ELLIPSIS
19 |     <data_pipeline.consumer.Consumer object at 0x...>
20 | 
21 | And another thing::
22 | 
23 |     >>> from data_pipeline.envelope import Envelope
24 |     >>> envelope = Envelope()
25 | 


--------------------------------------------------------------------------------
/data_pipeline/testing_helpers/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | zookeeper:
 2 |   image: wurstmeister/zookeeper
 3 | kafka:
 4 |   image: jcnnghm/kafka:latest
 5 |   links:
 6 |     - zookeeper:zk
 7 |   environment:
 8 |     KAFKA_BROKER_ID: 1
 9 |     KAFKA_ADVERTISED_PORT: 9092
10 | schematizerdatabase:
11 |   image: docker-dev.yelpcorp.com/schematizer_database:latest
12 | schematizerconfigs:
13 |   image: docker-dev.yelpcorp.com/schematizer_configs:latest
14 | schematizer:
15 |   image: docker-dev.yelpcorp.com/schematizer_service:latest
16 |   links:
17 |     - schematizerdatabase
18 |   volumes_from:
19 |     - schematizerconfigs
20 |   command: "/code/virtualenv_run/bin/python /code/serviceinitd/internal_schematizer start-dev"
21 | 


--------------------------------------------------------------------------------
/yelp_package/dockerfiles/lucid/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM docker-dev.yelpcorp.com/lucid_yelp
 2 | MAINTAINER Justin Cunningham <justinc@yelp.com>
 3 | # Heavily based on kwa's work for paasta-tools
 4 | 
 5 | # Make sure we get a package suitable for building this package correctly.
 6 | # Per dnephin we need https://github.com/spotify/dh-virtualenv/pull/20
 7 | # Which at this time is in this package
 8 | RUN apt-get update && apt-get -y install dpkg-dev python-tox python-setuptools \
 9 |   python-dev debhelper dh-virtualenv python-yaml python-pytest \
10 |   pyflakes python2.7 python2.7-dev help2man libffi-dev uuid-dev libuuid1 git \
11 |   libmysqlclient-dev
12 | 
13 | ENV HOME /work
14 | ENV PWD /work
15 | WORKDIR /work
16 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | branch = True
 3 | source =
 4 |     .
 5 | omit =
 6 |     .tox/*
 7 |     /usr/*
 8 |     */tmp*
 9 |     setup.py
10 |     # Don't complain if non-runnable code isn't run
11 |     */__main__.py
12 | 
13 | [report]
14 | exclude_lines =
15 |     # Have to re-enable the standard pragma
16 |     \#\s*pragma: no cover
17 | 
18 |     # Don't complain if tests don't hit defensive assertion code:
19 |     ^\s*raise AssertionError\b
20 |     ^\s*raise NotImplementedError\b
21 |     ^\s*return NotImplemented\b
22 |     ^\s*raise$
23 | 
24 |     # Don't complain if non-runnable code isn't run:
25 |     ^if __name__ == ['"]__main__['"]:$
26 | 
27 | [html]
28 | directory = coverage-html
29 | 
30 | # vim:ft=dosini
31 | 


--------------------------------------------------------------------------------
/yelp_package/dockerfiles/trusty/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM docker-dev.yelpcorp.com/trusty_yelp
 2 | MAINTAINER Justin Cunningham <justinc@yelp.com>
 3 | # Heavily based on kwa's work for paasta-tools
 4 | 
 5 | # Make sure we get a package suitable for building this package correctly.
 6 | # Per dnephin we need https://github.com/spotify/dh-virtualenv/pull/20
 7 | # Which at this time is in this package
 8 | RUN apt-get update && apt-get -y install dpkg-dev python-tox python-setuptools \
 9 |   python-dev debhelper dh-virtualenv python-yaml python-pytest \
10 |   pyflakes python2.7 python2.7-dev help2man libffi-dev uuid-dev libuuid1 \
11 |   libssl0.9.8 git libmysqlclient-dev libssl-dev
12 | 
13 | ENV HOME /work
14 | ENV PWD /work
15 | WORKDIR /work
16 | 


--------------------------------------------------------------------------------
/run_guard.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | 
17 | ./setup_bundles.sh
18 | export RBENV_ROOT=$HOME/.rbenv
19 | eval "$(rbenv init -)"
20 | bundle exec guard
21 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. complexity documentation master file, created by
 2 |    sphinx-quickstart on Tue Jul  9 22:26:36 2013.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Contents:
 7 | =========
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 | 
12 |    installation
13 |    usage
14 |    contributing
15 |    history
16 |    code/data_pipeline
17 | 
18 | Feedback
19 | ========
20 | 
21 | If you have any suggestions or questions about **Data Pipeline Clientlib** feel free to email me
22 | at justinc@yelp.com.
23 | 
24 | If you encounter any errors or problems with **Data Pipeline Clientlib**, please let me know!
25 | Create an issue on jira under the **DATAPIPE** project at
26 | https://jira.yelpcorp.com/browse/DATAPIPE.
27 | 


--------------------------------------------------------------------------------
/debian/rules:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/make -f
 2 | # -*- makefile -*-
 3 | 
 4 | # Uncomment this to turn on verbose mode.
 5 | export DH_VERBOSE=1
 6 | 
 7 | # This has to be exported to make some magic below work.
 8 | export DH_OPTIONS
 9 | export DH_VIRTUALENV_INSTALL_ROOT=/opt/venvs
10 | 
11 | %:
12 | 	dh $@ --with python-virtualenv
13 | 
14 | # do not call `make clean` as part of packaging
15 | override_dh_auto_clean:
16 | 	true
17 | 
18 | # Don't let debian build stuff, but we do hook in here to make man pages
19 | override_dh_auto_build:
20 | 	true
21 | 
22 | # do not call `make test` as part of packaging
23 | override_dh_auto_test:
24 | 	true
25 | 
26 | override_dh_virtualenv:
27 | 	dh_virtualenv -v --pypi-url='https://pypi.yelpcorp.com/simple' --extra-index-url='https://pypi.python.org/simple/' --python=/usr/bin/python2.7 --extra-pip-arg '--no-use-wheel'
28 | 


--------------------------------------------------------------------------------
/data_pipeline/schematizer_clientlib/models/model_base.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | 
20 | class BaseModel(object):
21 |     pass
22 | 


--------------------------------------------------------------------------------
/data_pipeline/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | __author__ = 'Justin Cunningham'
20 | __email__ = 'bam@yelp.com'
21 | __version__ = '0.9.13'
22 | 


--------------------------------------------------------------------------------
/data_pipeline/environment_configs.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import os
20 | 
21 | IS_OPEN_SOURCE_MODE = os.getenv('OPEN_SOURCE_MODE', 'false').lower() in ['t', 'true', 'y', 'yes']
22 | 


--------------------------------------------------------------------------------
/data_pipeline/testing_helpers/docker-compose-opensource.yml:
--------------------------------------------------------------------------------
 1 | zookeeper:
 2 |   image: wurstmeister/zookeeper
 3 | kafka:
 4 |   image: jcnnghm/kafka:latest
 5 |   links:
 6 |     - zookeeper:zk
 7 |   environment:
 8 |     KAFKA_BROKER_ID: 1
 9 |     KAFKA_ADVERTISED_PORT: 9092
10 | 
11 | # TODO (DATAPIPE-1858|abrar): change all the yelpcorp url's to docker hub once we
12 | # push our docker images to docker hub.
13 | schematizerdatabase:
14 |   image: docker.io/yelp/schematizer_database:latest
15 | schematizerconfigs:
16 |   image: docker.io/yelp/schematizer_configs:latest
17 | schematizer:
18 |   image: docker.io/yelp/schematizer_service:latest
19 |   links:
20 |     - schematizerdatabase
21 |   volumes_from:
22 |     - schematizerconfigs
23 |   command: "/code/virtualenv_run/bin/python -m serviceinitd.schematizer"
24 |   environment:
25 |     - SERVICE_CONFIG_PATH=config-open-source.yaml
26 |     - FORCE_AVOID_INTERNAL_PACKAGES=True
27 | 


--------------------------------------------------------------------------------
/tests/helpers/mock_utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import mock
20 | 
21 | 
22 | def attach_spy_on_func(target, attribute):
23 |     orig_func = getattr(target, attribute)
24 |     return mock.patch.object(target, attribute, side_effect=orig_func)
25 | 


--------------------------------------------------------------------------------
/data_pipeline/schematizer_clientlib/models/data_source_type_enum.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from enum import Enum
20 | 
21 | 
22 | class DataSourceTypeEnum(Enum):
23 |     """Eligible data source types."""
24 | 
25 |     Namespace = 1
26 |     Source = 2
27 | 


--------------------------------------------------------------------------------
/data_pipeline/schematizer_clientlib/models/target_schema_type_enum.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from enum import Enum
20 | 
21 | 
22 | class TargetSchemaTypeEnum(Enum):
23 |     """Eligible target schema types."""
24 | 
25 |     unsupported = 0
26 |     redshift = 1
27 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 2 |     sha: v0.6.0
 3 |     hooks:
 4 |     -   id: trailing-whitespace
 5 |     -   id: end-of-file-fixer
 6 |     -   id: autopep8-wrapper
 7 |     -   id: check-yaml
 8 |     -   id: debug-statements
 9 |         language_version: python2.7
10 |     -   id: flake8
11 |         language_version: python2.7
12 |     -   id: check-yaml
13 |     -   id: check-json
14 |     -   id: check-merge-conflict
15 |     -   id: name-tests-test
16 |         exclude: (tests/helpers/(.+).py)|(tests/factories/(.+).py)
17 |     -   id: fix-encoding-pragma
18 |     -   id: check-added-large-files
19 |     -   id: check-byte-order-marker
20 | -   repo: https://github.com/asottile/reorder_python_imports
21 |     sha: v0.3.0
22 |     hooks:
23 |     -   id: reorder-python-imports
24 |         language_version: python2.7
25 |         args:
26 |         - --add-import
27 |         - from __future__ import absolute_import
28 |         - --add-import
29 |         - from __future__ import unicode_literals
30 | 


--------------------------------------------------------------------------------
/setup_bundles.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | 
17 | export RBENV_ROOT=$HOME/.rbenv
18 | if [ ! -d "$HOME/.rbenv/plugins/ruby-build" ]; then
19 |     git clone https://github.com/sstephenson/ruby-build.git $HOME/.rbenv/plugins/ruby-build
20 | fi
21 | rbenv install 2.2.2 -s
22 | rbenv local 2.2.2
23 | rbenv rehash
24 | eval "$(rbenv init -)"
25 | gem install bundler
26 | rbenv rehash
27 | eval "$(rbenv init -)"
28 | bundle install --path=.bundle
29 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/introspector/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | # flake8: noqa
17 | from __future__ import absolute_import
18 | from __future__ import unicode_literals
19 | 
20 | from data_pipeline.tools.introspector.models.namespace import *
21 | from data_pipeline.tools.introspector.models.schema import *
22 | from data_pipeline.tools.introspector.models.source import *
23 | from data_pipeline.tools.introspector.models.topic import *
24 | 


--------------------------------------------------------------------------------
/tests/team_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import pytest
20 | 
21 | from data_pipeline.team import Team
22 | 
23 | 
24 | @pytest.mark.usefixtures('configure_teams')
25 | class TestTeam(object):
26 |     def test_team_exists(self):
27 |         assert Team.exists('bam')
28 | 
29 |     def test_team_does_not_exist(self):
30 |         assert not Team.exists('fake_team')
31 | 


--------------------------------------------------------------------------------
/data_pipeline/helpers/lists.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | """
17 | Utility methods for manipulating lists.
18 | """
19 | from __future__ import absolute_import
20 | from __future__ import unicode_literals
21 | 
22 | 
23 | def unlist(a_list):
24 |     """Convert the (possibly) single item list into a single item"""
25 |     if len(a_list) > 1:
26 |         raise ValueError(len(a_list))
27 | 
28 |     if len(a_list) == 0:
29 |         return None
30 |     else:
31 |         return a_list[0]
32 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | *~
 3 | .#*
 4 | ._*
 5 | \#*\#
 6 | .cache
 7 | 
 8 | # C extensions
 9 | *.so
10 | 
11 | # Develoopment Environment
12 | virtualenv_run
13 | venv
14 | 
15 | # Packages
16 | *.egg
17 | *.egg-info
18 | dist
19 | build
20 | eggs
21 | parts
22 | var
23 | sdist
24 | develop-eggs
25 | .installed.cfg
26 | lib
27 | lib64
28 | *.iml
29 | *.log
30 | /.venv.touch
31 | /.venv.docs.touch
32 | /virtualenv_run
33 | 
34 | # Installer logs
35 | pip-log.txt
36 | 
37 | # Unit test / coverage reports
38 | .coverage*
39 | /coverage-html
40 | .tox
41 | nosetests.xml
42 | 
43 | # Translations
44 | *.mo
45 | 
46 | # Mr Developer
47 | .mr.developer.cfg
48 | .project
49 | .pydevproject
50 | 
51 | # Complexity
52 | output/*.html
53 | output/*/index.html
54 | 
55 | # Sphinx
56 | docs/build
57 | docs/code
58 | 
59 | # Vim
60 | *.sw[nop]
61 | .ropeproject
62 | 
63 | # idea
64 | .idea/
65 | 
66 | # Cache
67 | __pycache__
68 | 
69 | # tmp
70 | tmp
71 | out.txt
72 | scratch
73 | 
74 | # Sublime
75 | *.sublime-*
76 | 
77 | # Artificats
78 | twistd.pid
79 | logs/*.log
80 | .bundle
81 | 
82 | # OS X
83 | .DS_Store
84 | 
85 | #vim tags
86 | tags
87 | 


--------------------------------------------------------------------------------
/data_pipeline/helpers/log.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import logging
20 | 
21 | from data_pipeline.config import get_config
22 | 
23 | 
24 | def debug_log(line_lambda, exc_info=None):
25 |     """This avoids unnecessary formatting of debug log string.
26 |     More info in DATAPIPE-979
27 |     """
28 |     if get_config().logger.isEnabledFor(logging.DEBUG):
29 |         get_config().logger.debug(line_lambda(), exc_info=exc_info)
30 | 


--------------------------------------------------------------------------------
/data_pipeline/helpers/singleton.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | 
20 | # This metaclass is from http://stackoverflow.com/questions/6760685/creating-a-singleton-in-python
21 | class Singleton(type):
22 |     _instances = {}
23 | 
24 |     def __call__(cls, *args, **kwargs):
25 |         if cls not in cls._instances:
26 |             cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
27 |         return cls._instances[cls]
28 | 


--------------------------------------------------------------------------------
/data_pipeline/helpers/frozendict_json_encoder.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import json
20 | 
21 | from frozendict import frozendict
22 | 
23 | 
24 | class FrozenDictEncoder(json.JSONEncoder):
25 |     """Custom json encoder for encoding frozendict objects
26 |     """
27 | 
28 |     def default(self, obj):
29 |         if isinstance(obj, frozendict):
30 |             return dict(obj)
31 |         return json.JSONEncoder.default(self, obj)
32 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/_glob_util.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import glob
20 | 
21 | 
22 | def get_file_paths_from_glob_patterns(glob_patterns):
23 |     """ Return a set of files matching the given list of glob patterns
24 |      (for example ["./test.sql", "./other_tables/*.sql"])
25 |     """
26 |     file_paths = set()
27 |     for glob_pattern in glob_patterns:
28 |         file_paths |= set(glob.glob(glob_pattern))
29 |     return file_paths
30 | 


--------------------------------------------------------------------------------
/Gemfile.lock:
--------------------------------------------------------------------------------
 1 | GEM
 2 |   remote: https://rubygems.org/
 3 |   specs:
 4 |     celluloid (0.16.0)
 5 |       timers (~> 4.0.0)
 6 |     coderay (1.1.0)
 7 |     ffi (1.9.8)
 8 |     foreman (0.78.0)
 9 |       thor (~> 0.19.1)
10 |     formatador (0.2.5)
11 |     guard (2.12.5)
12 |       formatador (>= 0.2.4)
13 |       listen (~> 2.7)
14 |       lumberjack (~> 1.0)
15 |       nenv (~> 0.1)
16 |       notiffany (~> 0.0)
17 |       pry (>= 0.9.12)
18 |       shellany (~> 0.0)
19 |       thor (>= 0.18.1)
20 |     hitimes (1.2.2)
21 |     listen (2.10.0)
22 |       celluloid (~> 0.16.0)
23 |       rb-fsevent (>= 0.9.3)
24 |       rb-inotify (>= 0.9)
25 |     lumberjack (1.0.9)
26 |     method_source (0.8.2)
27 |     nenv (0.2.0)
28 |     notiffany (0.0.6)
29 |       nenv (~> 0.1)
30 |       shellany (~> 0.0)
31 |     pry (0.10.1)
32 |       coderay (~> 1.1.0)
33 |       method_source (~> 0.8.1)
34 |       slop (~> 3.4)
35 |     rb-fsevent (0.9.4)
36 |     rb-inotify (0.9.5)
37 |       ffi (>= 0.5.0)
38 |     shellany (0.0.1)
39 |     slop (3.6.0)
40 |     thor (0.19.1)
41 |     timers (4.0.1)
42 |       hitimes
43 | 
44 | PLATFORMS
45 |   ruby
46 | 
47 | DEPENDENCIES
48 |   foreman
49 |   guard
50 | 


--------------------------------------------------------------------------------
/data_pipeline/publish_guarantee.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from enum import Enum
20 | 
21 | 
22 | class PublishGuaranteeEnum(Enum):
23 |     """Enum that specifies what kind of message publishing guarantee provided
24 |     by the producer.
25 | 
26 |     Attributes:
27 |       exact_once: message will be successfully published exactly once.
28 |       at_least_once: message will be successfully published at least once.
29 |     """
30 | 
31 |     exact_once = 0
32 |     at_least_once = 1
33 | 


--------------------------------------------------------------------------------
/data_pipeline/_clog_writer.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import clog
20 | 
21 | from data_pipeline.config import get_config
22 | from data_pipeline.envelope import Envelope
23 | 
24 | logger = get_config().logger
25 | 
26 | 
27 | class ClogWriter(object):
28 | 
29 |     def __init__(self):
30 |         self.envelope = Envelope()
31 | 
32 |     def publish(self, message):
33 |         try:
34 |             clog.log_line(message.topic, self.envelope.pack(message, ascii_encoded=True))
35 |         except:
36 |             logger.error("Failed to scribe message - {}".format(str(message)))
37 | 


--------------------------------------------------------------------------------
/tests/tools/_glob_util_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import mock
20 | 
21 | from data_pipeline.tools._glob_util import get_file_paths_from_glob_patterns
22 | 
23 | 
24 | def test_get_file_paths_from_glob_patterns():
25 |     with mock.patch('data_pipeline.tools._glob_util.glob') as mock_glob:
26 |         mock_glob.glob = mock.Mock(return_value=['test'])
27 |         paths = get_file_paths_from_glob_patterns(['*.sql', 'some/dir/*.avsc'])
28 |         assert paths == {'test'}
29 |         assert mock_glob.glob.mock_calls == [
30 |             mock.call('*.sql'),
31 |             mock.call('some/dir/*.avsc'),
32 |         ]
33 | 


--------------------------------------------------------------------------------
/yelp_package/Makefile:
--------------------------------------------------------------------------------
 1 | UID:=`id -u`
 2 | GID:=`id -g`
 3 | PACKAGE:=data_pipeline
 4 | DOCKER_RUN_LUCID:=docker run -t -v  $(CURDIR)/../:/work:rw $(PACKAGE)_lucid_container
 5 | DOCKER_RUN_TRUSTY:=docker run -t -v  $(CURDIR)/../:/work:rw $(PACKAGE)_trusty_container
 6 | DOCKER_QUICK_START:=docker run -t -i -v $(CURDIR)/../:/work:rw $(PACKAGE)_lucid_container
 7 | 
 8 | build_lucid_docker:
 9 | 	[ -d ../dist ] || mkdir ../dist
10 | 	cd dockerfiles/lucid/ && docker build -t "$(PACKAGE)_lucid_container" .
11 | package_lucid: build_lucid_docker
12 | 	$(DOCKER_RUN_LUCID) /bin/bash -c "dpkg-buildpackage -d && mv ../*.deb dist/"
13 | 	$(DOCKER_RUN_LUCID) chown -R $(UID):$(GID) /work
14 | itest_lucid: package_lucid
15 | 	$(DOCKER_RUN_LUCID) /work/yelp_package/itest/ubuntu.sh
16 | 
17 | 
18 | build_trusty_docker:
19 | 	[ -d ../dist ] || mkdir ../dist
20 | 	cd dockerfiles/trusty/ && docker build -t "$(PACKAGE)_trusty_container" .
21 | package_trusty: build_trusty_docker
22 | 	$(DOCKER_RUN_TRUSTY) /bin/bash -c "dpkg-buildpackage -d && mv ../*.deb dist/"
23 | 	$(DOCKER_RUN_TRUSTY) chown -R $(UID):$(GID) /work
24 | itest_trusty: package_trusty
25 | 	$(DOCKER_RUN_TRUSTY) /work/yelp_package/itest/ubuntu.sh
26 | 
27 | quick_start:
28 | 	$(DOCKER_QUICK_START) /bin/bash
29 | 
30 | clean:
31 | 	rm -rf dist/
32 | 	find . -name '*.pyc' -delete
33 | 	find . -name '__pycache__' -delete
34 | 


--------------------------------------------------------------------------------
/yelp_package/itest/ubuntu.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | set -e
17 | 
18 | # To add additional scripts, just add lines here
19 | SCRIPTS="data_pipeline_tailer
20 | data_pipeline_refresh_runner
21 | data_pipeline_refresh_manager
22 | data_pipeline_refresh_requester
23 | data_pipeline_compaction_setter
24 | data_pipeline_introspector"
25 | 
26 | if dpkg -i /work/dist/*.deb; then
27 |   echo "Looks like it installed correctly"
28 | else
29 |   echo "Dpkg install failed"
30 |   exit 1
31 | fi
32 | 
33 | for scr in $SCRIPTS
34 | do
35 |   which $scr >/dev/null || (echo "$scr failed to install!"; exit 1)
36 |   echo "Running '$scr -h' to make sure it works"
37 |   $scr -h >/dev/null || (echo "$scr failed to execute!"; exit 1)
38 | done
39 | 
40 | echo "Everything worked!"
41 | 


--------------------------------------------------------------------------------
/tests/benchmarks/_fast_uuid_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import pytest
20 | 
21 | from data_pipeline._fast_uuid import FastUUID
22 | 
23 | 
24 | @pytest.mark.usefixtures(
25 |     "config_benchmark_containers_connections"
26 | )
27 | @pytest.mark.benchmark
28 | class TestBenchFastUUID(object):
29 | 
30 |     @pytest.fixture
31 |     def fuuid(self):
32 |         return FastUUID()
33 | 
34 |     def test_uuid1(self, benchmark, fuuid):
35 | 
36 |         @benchmark
37 |         def create():
38 |             fuuid.uuid1()
39 | 
40 |     def test_uuid4(self, benchmark, fuuid):
41 | 
42 |         @benchmark
43 |         def create():
44 |             fuuid.uuid4()
45 | 


--------------------------------------------------------------------------------
/data_pipeline/_consumer_tick.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import time
20 | 
21 | 
22 | class _ConsumerTick(object):
23 |     """ This class manages state related to ticks and triggers ticks on the
24 |     attached Consumer every refresh_time_seconds. It can be used by
25 |     refresh_new_topics and other methods requiring tick functionality.
26 |     """
27 | 
28 |     def __init__(self, refresh_time_seconds):
29 |         self.refresh_time_seconds = refresh_time_seconds
30 |         self._reset()
31 | 
32 |     def _reset(self):
33 |         self.next_tick_time = time.time() + self.refresh_time_seconds
34 | 
35 |     def should_tick(self):
36 |         return time.time() >= self.next_tick_time
37 | 


--------------------------------------------------------------------------------
/data_pipeline/schemas/monitoring_message_v1.avsc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "record",
 3 |   "namespace": "yelp.data_pipeline",
 4 |   "name": "monitoring_message",
 5 |   "doc": "Monitoring message that counts the number of messages that are produced/consumed by specific client in a given time frame",
 6 |   "fields": [
 7 |     {
 8 |       "name": "topic",
 9 |       "type": "string",
10 |       "doc": "topic associated with the messages produced/consumed by client"
11 |     },
12 |     {
13 |       "name": "client_name",
14 |       "type": "string",
15 |       "doc": "name of the client that is publishing/consuming messages"
16 |     },
17 |     {
18 |       "name": "client_type",
19 |       "type": {
20 |         "name": "client_type",
21 |         "doc": "ENUM of Client Types",
22 |         "type": "enum",
23 |         "symbols": [
24 |           "producer",
25 |           "consumer"
26 |         ]
27 |       },
28 |       "doc": "identifies the type of client: a 'consumer' or a 'producer'"
29 |     },
30 |     {
31 |       "name": "message_count",
32 |       "type": "int",
33 |       "doc": "number of messages published/consumed by the client"
34 |     },
35 |     {
36 |       "name": "start_timestamp",
37 |       "type": "int",
38 |       "doc": "Time the monitoring system started counting messages"
39 |     },
40 |     {
41 |       "name": "host_info",
42 |       "type": "string",
43 |       "doc": "Host information of client"
44 |     }
45 |   ]
46 | }
47 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/introspector/models/schema.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from data_pipeline.tools.introspector.models.base import BaseIntrospectorModel
20 | from data_pipeline.tools.introspector.models.topic import IntrospectorTopic
21 | 
22 | 
23 | class IntrospectorSchema(BaseIntrospectorModel):
24 |     def __init__(self, schema_obj, include_topic_info=False):
25 |         super(IntrospectorSchema, self).__init__(
26 |             schema_obj
27 |         )
28 |         self._fields = [
29 |             'schema_id', 'base_schema_id', 'status',
30 |             'primary_keys', 'created_at', 'note', 'schema_json'
31 |         ]
32 |         if include_topic_info:
33 |             self._fields.append('topic')
34 |             self.topic = IntrospectorTopic(self.topic).to_ordered_dict()
35 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/introspector/register_command_parser.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from data_pipeline.tools.introspector.register.avro_command import RegisterAvroCommand
20 | from data_pipeline.tools.introspector.register.mysql_command import RegisterMysqlCommand
21 | 
22 | 
23 | class RegisterCommandParser(object):
24 | 
25 |     @classmethod
26 |     def add_parser(cls, subparsers):
27 |         register_command_parser = subparsers.add_parser(
28 |             "register",
29 |             description="Register a given schema to the schematizer."
30 |         )
31 | 
32 |         register_command_subparsers = register_command_parser.add_subparsers()
33 |         RegisterAvroCommand.add_parser(register_command_subparsers)
34 |         RegisterMysqlCommand.add_parser(register_command_subparsers)
35 | 


--------------------------------------------------------------------------------
/data_pipeline/initialization_vector.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import os
20 | 
21 | from Crypto.Cipher import AES
22 | 
23 | from data_pipeline.meta_attribute import MetaAttribute
24 | 
25 | 
26 | def get_initialization_vector(schema_id, initialization_vector_array=None):
27 |     if initialization_vector_array is None:
28 |         initialization_vector_array = os.urandom(AES.block_size)
29 |     _verify_initialization_vector_params(initialization_vector_array)
30 |     return MetaAttribute(
31 |         schema_id=schema_id,
32 |         payload_data=initialization_vector_array
33 |     )
34 | 
35 | 
36 | def _verify_initialization_vector_params(vector_array):
37 |     if not isinstance(vector_array, bytes) or not len(vector_array) == 16:
38 |         raise TypeError('Initialization Vector must be a 16-byte array')
39 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:14.04.1
 2 | MAINTAINER justinc@yelp.com
 3 | 
 4 | run apt-get update && apt-get upgrade -y
 5 | run apt-get install -y wget language-pack-en-base
 6 | 
 7 | run locale-gen en_US en_US.UTF-8 && dpkg-reconfigure locales
 8 | 
 9 | run mkdir /src
10 | 
11 | workdir /src
12 | 
13 | run wget https://bitbucket.org/pypy/pypy/downloads/pypy-5.1.1-linux64.tar.bz2
14 | run bunzip2 pypy-5.1.1-linux64.tar.bz2
15 | run tar xvf pypy-5.1.1-linux64.tar
16 | 
17 | ENV PATH $PATH:/src/pypy-5.1.1-linux64/bin/
18 | 
19 | run wget https://bootstrap.pypa.io/get-pip.py
20 | run pypy get-pip.py
21 | 
22 | run apt-get update && apt-get install -y build-essential git vim libpq5 libpq-dev docker \
23 |     libmysqlclient-dev libsnappy-dev
24 | 
25 | 
26 | run ln -s /usr/bin/gcc /usr/local/bin/cc
27 | 
28 | run pip install virtualenv tox
29 | 
30 | # Setup clientlib
31 | WORKDIR /data_pipeline
32 | add requirements.d/dev.txt /data_pipeline/requirements.d/dev.txt
33 | add requirements.d/tools.txt /data_pipeline/requirements.d/tools.txt
34 | add requirements.txt /data_pipeline/requirements.txt
35 | add setup.py /data_pipeline/setup.py
36 | add data_pipeline/__init__.py /data_pipeline/data_pipeline/__init__.py
37 | add README.rst /data_pipeline/README.rst
38 | add HISTORY.rst /data_pipeline/HISTORY.rst
39 | add bin/ /data_pipeline/bin
40 | 
41 | # Install dependencies
42 | run mkdir /dp_reqs
43 | run virtualenv /dp_reqs/venv
44 | run /dp_reqs/venv/bin/pip install -i https://pypi.yelpcorp.com/simple/ -r /data_pipeline/requirements.d/dev.txt
45 | 
46 | ADD . /data_pipeline
47 | 
48 | VOLUME ["/data_pipeline"]
49 | 


--------------------------------------------------------------------------------
/tests/tools/schema_ref_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | 
20 | class TestSchemaRef(object):
21 | 
22 |     def test_source_to_ref_map_is_complete(
23 |         self,
24 |         schema_ref,
25 |         good_source_ref,
26 |         bad_source_ref,
27 |         source
28 |     ):
29 |         assert schema_ref.get_source_ref(source) == good_source_ref
30 |         assert schema_ref.get_source_ref('bad_source') == bad_source_ref
31 |         assert len(schema_ref._source_to_ref_map) == 2
32 | 
33 |     def test_source_to_ref_map_can_be_empty(self, schema_ref):
34 |         schema_ref.schema_ref = {}
35 |         assert len(schema_ref._source_to_ref_map) == 0
36 | 
37 |     def test_defaults_are_respected(self, schema_ref, schema_ref_defaults):
38 |         for key, val in schema_ref_defaults.items():
39 |             assert schema_ref.get_source_val('bad_source', key) == val
40 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/introspector/info_command_parser.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from data_pipeline.tools.introspector.info.namespace import NamespaceInfoCommand
20 | from data_pipeline.tools.introspector.info.source import SourceInfoCommand
21 | from data_pipeline.tools.introspector.info.topic import TopicInfoCommand
22 | 
23 | 
24 | class InfoCommandParser(object):
25 | 
26 |     @classmethod
27 |     def add_parser(cls, subparsers):
28 |         info_command_parser = subparsers.add_parser(
29 |             "info",
30 |             description="Get information on a specific data pipeline item."
31 |         )
32 | 
33 |         info_command_subparsers = info_command_parser.add_subparsers()
34 |         TopicInfoCommand.add_parser(info_command_subparsers)
35 |         SourceInfoCommand.add_parser(info_command_subparsers)
36 |         NamespaceInfoCommand.add_parser(info_command_subparsers)
37 | 


--------------------------------------------------------------------------------
/data_pipeline/schemas/registration_message_v1.avsc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "record",
 3 |   "namespace": "yelp.data_pipeline",
 4 |   "name": "client_registration_message_v1",
 5 |   "doc": "Message format for Producer/Consumer registration.",
 6 |   "fields": [
 7 |     {
 8 |       "name": "team_name",
 9 |       "type": "string",
10 |       "doc": "Team name, as defined in `sensu_handlers::teams` (see y/sensu-teams)"
11 |     },
12 |     {
13 |       "name": "client_name",
14 |       "type": "string",
15 |       "doc": "Name associated with the client - this name will be used as the client's identifier for producer/consumer registration."
16 |     },
17 |     {
18 |       "name": "client_type",
19 |       "type": {
20 |         "name": "client_type",
21 |         "doc": "ENUM of Client Types",
22 |         "type": "enum",
23 |         "symbols": [
24 |           "producer",
25 |           "consumer"
26 |         ]
27 |       },
28 |       "doc": "identifies the type of client: a 'consumer' or a 'producer'"
29 |     },
30 |     {
31 |       "name": "timestamp",
32 |       "type": ["null", "long"],
33 |       "logicalType": "timestamp-millis",
34 |       "doc": "The most recent time that the Client read/wrote a message with the schema_id"
35 |     },
36 |     {
37 |       "name": "expected_frequency_seconds",
38 |       "type": "int",
39 |       "doc": "How often(seconds) on average the client publishes/receives a messsage to/from the data pipeline"
40 |     },
41 |     {
42 |       "name": "schema_id",
43 |       "type": "int",
44 |       "doc": "Schema Id for which this message contains information about last time used"
45 |     }
46 |   ]
47 | }


--------------------------------------------------------------------------------
/data_pipeline/tools/introspector/models/source.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from data_pipeline.tools.introspector.models.base import BaseIntrospectorModel
20 | 
21 | 
22 | class IntrospectorSource(BaseIntrospectorModel):
23 |     def __init__(self, source_obj, active_sources=None):
24 |         super(IntrospectorSource, self).__init__(
25 |             source_obj
26 |         )
27 |         self._fields = [
28 |             'name', 'source_id', 'owner_email', 'namespace'
29 |         ]
30 |         self.namespace = source_obj.namespace.name
31 |         # Need to check for none in case of empty list
32 |         if active_sources is not None:
33 |             self._fields.append('active_topic_count')
34 |             active_source = active_sources.get(source_obj.source_id, None)
35 |             self.active_topic_count = 0 if (
36 |                 not active_source
37 |             ) else active_source['active_topic_count']
38 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/introspector/list_command_parser.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from data_pipeline.tools.introspector.list_command.namespaces import NamespacesListCommand
20 | from data_pipeline.tools.introspector.list_command.sources import SourcesListCommand
21 | from data_pipeline.tools.introspector.list_command.topics import TopicsListCommand
22 | 
23 | 
24 | class ListCommandParser(object):
25 | 
26 |     @classmethod
27 |     def add_parser(cls, subparsers):
28 |         list_command_parser = subparsers.add_parser(
29 |             "list",
30 |             description="Get a list of specified items as a JSON array of objects."
31 |         )
32 | 
33 |         list_command_subparsers = list_command_parser.add_subparsers()
34 |         TopicsListCommand.add_parser(list_command_subparsers)
35 |         SourcesListCommand.add_parser(list_command_subparsers)
36 |         NamespacesListCommand.add_parser(list_command_subparsers)
37 | 


--------------------------------------------------------------------------------
/tests/benchmarks/envelope_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import pytest
20 | 
21 | from data_pipeline.envelope import Envelope
22 | from tests.factories.base_factory import MessageFactory
23 | 
24 | 
25 | @pytest.mark.usefixtures(
26 |     "config_benchmark_containers_connections"
27 | )
28 | @pytest.mark.benchmark
29 | class TestBenchEnvelope(object):
30 | 
31 |     @pytest.fixture
32 |     def envelope(self):
33 |         return Envelope()
34 | 
35 |     def test_pack(self, benchmark, envelope):
36 | 
37 |         def setup():
38 |             return [MessageFactory.create_message_with_payload_data()], {}
39 | 
40 |         benchmark.pedantic(envelope.pack, setup=setup, rounds=1000)
41 | 
42 |     def test_unpack(self, benchmark, envelope):
43 | 
44 |         def setup():
45 |             return [envelope.pack(MessageFactory.create_message_with_payload_data())], {}
46 | 
47 |         benchmark.pedantic(envelope.unpack, setup=setup, rounds=1000)
48 | 


--------------------------------------------------------------------------------
/data_pipeline/helpers/decorators.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import cPickle
20 | from functools import wraps
21 | 
22 | 
23 | def memoized(func):
24 |     """Decorator that caches a function's return value each time it is called.
25 |     If called later with the same arguments, the cached value is returned, and
26 |     the function is not re-evaluated.
27 | 
28 |     Based upon from http://wiki.python.org/moin/PythonDecoratorLibrary#Memoize
29 |     Nota bene: this decorator memoizes /all/ calls to the function.
30 |     For a memoization decorator with limited cache size, consider:
31 |     http://code.activestate.com/recipes/496879-memoize-decorator-function-with-cache-size-limit/
32 |     """
33 |     cache = {}
34 | 
35 |     @wraps(func)
36 |     def func_wrapper(*args, **kwargs):
37 |         key = cPickle.dumps((args, kwargs))
38 |         if key not in cache:
39 |             cache[key] = func(*args, **kwargs)
40 |         return cache[key]
41 |     return func_wrapper
42 | 


--------------------------------------------------------------------------------
/tests/helpers/config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from contextlib import contextmanager
20 | 
21 | import staticconf
22 | 
23 | from data_pipeline.config import configure_from_dict
24 | from data_pipeline.config import namespace
25 | 
26 | 
27 | @contextmanager
28 | def reconfigure(**kwargs):
29 |     """Reconfigures the given kwargs, restoring the current configuration for
30 |     only those kwargs when the contextmanager exits.
31 |     """
32 |     conf_namespace = staticconf.config.get_namespace(namespace)
33 |     starting_config = {
34 |         k: v for k, v in conf_namespace.get_config_values().iteritems()
35 |         if k in kwargs
36 |     }
37 |     configure_from_dict(kwargs)
38 |     try:
39 |         yield
40 |     finally:
41 |         final_config = {
42 |             k: v for k, v in conf_namespace.get_config_values().iteritems()
43 |             if k not in kwargs
44 |         }
45 |         final_config.update(starting_config)
46 |         staticconf.config.get_namespace(namespace).clear()
47 |         configure_from_dict(final_config)
48 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/introspector/models/namespace.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from data_pipeline.tools.introspector.models.base import BaseIntrospectorModel
20 | 
21 | 
22 | class IntrospectorNamespace(BaseIntrospectorModel):
23 |     def __init__(self, namespace_obj, active_namespaces=None):
24 |         super(IntrospectorNamespace, self).__init__(
25 |             namespace_obj
26 |         )
27 |         self._fields = [
28 |             'name', 'namespace_id'
29 |         ]
30 |         # Need to check for none in case of empty list
31 |         if active_namespaces is not None:
32 |             self._fields.append('active_source_count')
33 |             self._fields.append('active_topic_count')
34 |             self.active_source_count = 0
35 |             self.active_topic_count = 0
36 |             active_namespace = active_namespaces.get(self.name, None)
37 |             if active_namespace:
38 |                 self.active_source_count = active_namespace['active_source_count']
39 |                 self.active_topic_count = active_namespace['active_topic_count']
40 | 


--------------------------------------------------------------------------------
/data_pipeline/schematizer_clientlib/models/namespace.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from collections import namedtuple
20 | 
21 | from data_pipeline.schematizer_clientlib.models.model_base import BaseModel
22 | 
23 | 
24 | """
25 | Represent the data of a namespace.  Namespace is a group which the avro schemas
26 | belong to.  It is the highest grouping level of schemas.  For example,
27 | `yelp_main` could be a namespace.
28 | 
29 | Args:
30 |     namespace_id (int): The id of the namespace.
31 |     name (str): The name of the namespace.
32 | """
33 | Namespace = namedtuple('Namespace', ['namespace_id', 'name'])
34 | 
35 | 
36 | class _Namespace(BaseModel):
37 | 
38 |     def __init__(self, namespace_id, name):
39 |         self.namespace_id = namespace_id
40 |         self.name = name
41 | 
42 |     @classmethod
43 |     def from_response(cls, response):
44 |         return cls(
45 |             namespace_id=response.namespace_id,
46 |             name=response.name
47 |         )
48 | 
49 |     def to_result(self):
50 |         return Namespace(
51 |             namespace_id=self.namespace_id,
52 |             name=self.name
53 |         )
54 | 


--------------------------------------------------------------------------------
/tests/tools/meteorite_wrappers_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import mock
20 | 
21 | from data_pipeline.tools.meteorite_wrappers import StatsCounter
22 | 
23 | 
24 | class TestStatsCounter(object):
25 | 
26 |     @mock.patch('yelp_meteorite.metrics.Counter.count', autospec=True)
27 |     def test_stats_counter(self, mock_count):
28 |         counter = StatsCounter('test_stat', message_count_timer=0, stat_type='test_type')
29 |         counter.increment('test_type')
30 |         assert mock_count.call_count == 1
31 | 
32 |     @mock.patch('yelp_meteorite.metrics.Counter.count', autospec=True)
33 |     def test_batched_counter(self, mock_count):
34 |         with mock.patch(
35 |             'data_pipeline.tools.meteorite_wrappers.time',
36 |         ) as mock_time:
37 |             mock_time.time.side_effect = [
38 |                 2, 3, 8, 9
39 |             ]
40 |             counter = StatsCounter('test_stat', message_count_timer=4, stat_type='test_type')
41 |             counter.increment('test_type')
42 |             counter.increment('test_type')
43 |             # Two increments are batched into 1 call.
44 |             assert mock_count.call_count == 1
45 | 


--------------------------------------------------------------------------------
/Makefile-opensource:
--------------------------------------------------------------------------------
 1 | CURRENT_VERSION=$(strip $(shell sed -n -r "s/__version__ = '(.+)'/\1/p" $(CURDIR)/data_pipeline/__init__.py))
 2 | NEXT_VERSION=$(shell echo $(CURRENT_VERSION) | awk -F. '/[0-9]+\./{$$NF+=1;OFS=".";print}')
 3 | 
 4 | REBUILD_FLAG =
 5 | 
 6 | .PHONY: help all production clean clean-pyc clean-build clean-docs clean-vim lint test docs coverage install-hooks compose-prefix
 7 | 
 8 | help:
 9 | 	@echo "clean-build - remove build artifacts"
10 | 	@echo "clean-pyc - remove Python file artifacts"
11 | 	@echo "clean-docs - remove doc creation artifacts"
12 | 	@echo "clean-vim - remove vim swap file artifacts"
13 | 	@echo "test - run tests quickly with the default Python"
14 | 	@echo "coverage - check code coverage"
15 | 	@echo "docs - generates Sphinx HTML documentation, including API docs"
16 | 	@echo "compose-prefix - generates a preconfigured docker-compose command"
17 | 
18 | all: production install-hooks
19 | 
20 | production:
21 | 	@true
22 | 
23 | clean: clean-build clean-pyc clean-docs
24 | 
25 | clean-build:
26 | 	rm -fr build/
27 | 	rm -fr dist/
28 | 	rm -fr *.egg-info
29 | 
30 | clean-pyc:
31 | 	find . -name '*.pyc' -exec rm -f {} +
32 | 	find . -name '*.pyo' -exec rm -f {} +
33 | 	find . -name '*~' -exec rm -f {} +
34 | 
35 | clean-docs:
36 | 	rm -rf docs/build/*
37 | 	rm -rf docs/code/*
38 | 
39 | clean-vim:
40 | 	find . -name '*.swp' -exec rm -f {} +
41 | 	find . -name '*.swo' -exec rm -f {} +
42 | 
43 | test:
44 | 	OPEN_SOURCE_MODE=true PULL_CONTAINERS=true FORCE_FRESH_CONTAINERS=true tox -c tox-opensource.ini $(REBUILD_FLAG)
45 | 
46 | docs: clean-docs 
47 | 	tox -c tox-opensource.ini -e docs $(REBUILD_FLAG)
48 | 
49 | coverage: test
50 | 
51 | install-hooks:
52 | 	tox -c tox-opensource.ini -e pre-commit -- install -f --install-hooks
53 | 
54 | compose-prefix:
55 | 	@OPEN_SOURCE_MODE=true python -c "from data_pipeline.testing_helpers.containers import Containers; print Containers.compose_prefix()"
56 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/introspector/models/base.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import datetime
20 | from collections import OrderedDict
21 | 
22 | 
23 | class BaseIntrospectorModel(object):
24 |     def __init__(self, model_obj, excluded_fields=None):
25 |         if not excluded_fields:
26 |             excluded_fields = []
27 |         fields_to_grab = [
28 |             field for field in model_obj._fields if field not in excluded_fields
29 |         ]
30 |         for field in fields_to_grab:
31 |             value = getattr(model_obj, field)
32 |             if isinstance(value, datetime.datetime):
33 |                 # datetime objects are not json serializable
34 |                 value = str(value)
35 |             setattr(self, field, value)
36 | 
37 |     def to_ordered_dict(self):
38 |         if not hasattr(self, '_fields'):
39 |             raise NotImplementedError(
40 |                 "Derived class does not have a defined _fields "
41 |                 "attribute to define order of fields for dict"
42 |             )
43 |         result_dict = OrderedDict([])
44 |         for field in self._fields:
45 |             result_dict[field] = getattr(self, field)
46 |         return result_dict
47 | 


--------------------------------------------------------------------------------
/tox-opensource.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | # Removed for now:
 3 | # py34, py35, pypy
 4 | envlist = py27, docs
 5 | skipsdist = true
 6 | 
 7 | [testenv]
 8 | basepython = python2.7
 9 | passenv = *
10 | envdir = venv/py27
11 | venv_update = {toxinidir}/bin/venv-update venv= {envdir} install= 
12 | commands =
13 | 	{[testenv]venv_update} -r {toxinidir}/requirements.d/dev.txt
14 |     py.test -c tox-opensource.ini --cov=data_pipeline --maxfail=3 tests/
15 |     pre-commit run --all-files
16 | 
17 | [testenv:pre-commit]
18 | envdir = venv/pre_commit
19 | commands = 
20 | 	{[testenv]venv_update} -r {toxinidir}/requirements.d/pre_commit.txt
21 | 	pre-commit {posargs}
22 | 
23 | [testenv:guard]
24 | envlist = py27
25 | envdir = venv/py27
26 | commands =
27 | 	{[testenv]venv_update} -r {toxinidir}/requirements.d/dev.txt
28 |     py.test -c tox-opensource.ini --doctest-modules -m "not pending" {posargs}
29 | 
30 | [testenv:docs]
31 | envdir = venv/py27
32 | commands =
33 | 	{[testenv]venv_update} -r {toxinidir}/requirements.d/dev.txt
34 |     sphinx-apidoc -f -e -o docs/code data_pipeline
35 |     sphinx-build -b html -d docs/build/doctrees docs/ docs/build/html
36 | 
37 | [testenv:devenv]
38 | envdir = venv/py27
39 | commands =
40 | 	{[testenv]venv_update} -r {toxinidir}/requirements.d/dev.txt
41 | 
42 | [testenv:devenv-command]
43 | envdir = venv/py27
44 | commands = 
45 | 	{[testenv]venv_update} -r {toxinidir}/requirements.d/dev.txt
46 | 	{posargs}
47 | 
48 | [flake8]
49 | ignore =
50 | exclude = .git,.tox,docs,virtualenv_run,venv,__pycache__,.ropeproject,debian,dist
51 | filename = *.py,*.wsgi
52 | max-line-length = 131
53 | 
54 | [pytest]
55 | norecursedirs = tests/consumer tests/tools tests/benchmarks
56 | addopts = -m"not benchmark" -m"not skip" --ignore=setup.py --doctest-glob=*.rst -vv
57 | 
58 | [pep8]
59 | # E265 deals with spacing inside of comments - breaks human formatting
60 | # E309 puts a blank line after class declarations - doesn't work well with docstrings
61 | # E501 reformats lines to fit in --max-line-length poorly
62 | ignore = E265,E309,E501
63 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/introspector/main.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import argparse
20 | 
21 | from data_pipeline import __version__
22 | from data_pipeline.tools.introspector.info_command_parser import InfoCommandParser
23 | from data_pipeline.tools.introspector.list_command_parser import ListCommandParser
24 | from data_pipeline.tools.introspector.register_command_parser import RegisterCommandParser
25 | from data_pipeline.tools.introspector.schema_check_command import SchemaCheckCommand
26 | 
27 | 
28 | def parse_args():
29 |     parser = argparse.ArgumentParser(
30 |         description="data_pipeline_introspector provides ability to view the current "
31 |         "state of the data pipeline from a top-down view of namespaces."
32 |     )
33 |     parser.add_argument(
34 |         '--version',
35 |         action='version',
36 |         version="data_pipeline {}".format(__version__)
37 |     )
38 | 
39 |     subparsers = parser.add_subparsers()
40 |     ListCommandParser.add_parser(subparsers)
41 |     InfoCommandParser.add_parser(subparsers)
42 |     RegisterCommandParser.add_parser(subparsers)
43 |     SchemaCheckCommand.add_parser(subparsers)
44 |     return parser.parse_args()
45 | 
46 | 
47 | def run():
48 |     args = parse_args()
49 |     args.command(args)
50 | 
51 | 
52 | if __name__ == "__main__":
53 |     run()
54 | 


--------------------------------------------------------------------------------
/tests/factories/base_factory.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from random import randint
20 | 
21 | from data_pipeline.message import CreateMessage
22 | from data_pipeline.schematizer_clientlib.schematizer import get_schematizer
23 | 
24 | 
25 | class SchemaFactory(object):
26 |     SOURCE_SCHEMA = '''
27 |     {
28 |         "type": "record",
29 |         "namespace": "test_namespace",
30 |         "doc": "test_doc",
31 |         "name": "source_schema",
32 |         "fields": [
33 |             {"type": "int","name": "original", "doc": "test_doc"}
34 |         ]
35 |     }
36 |     '''
37 | 
38 |     @classmethod
39 |     def get_schema_json(cls):
40 |         return get_schematizer().register_schema(
41 |             schema_str=cls.SOURCE_SCHEMA,
42 |             namespace='test_namespace',
43 |             source="test_source_{}".format(randint(0, 100)),
44 |             source_owner_email='test@yelp.com',
45 |             contains_pii=False
46 |         )
47 | 
48 |     @classmethod
49 |     def get_payload_data(cls):
50 |         return {"original": randint(0, 1000000)}
51 | 
52 | 
53 | class MessageFactory(object):
54 | 
55 |     @classmethod
56 |     def create_message_with_payload_data(self):
57 |         return CreateMessage(
58 |             schema_id=SchemaFactory.get_schema_json().schema_id,
59 |             payload_data=SchemaFactory.get_payload_data()
60 |         )
61 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/sensu_ttl_alerter.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import copy
20 | 
21 | import pysensu_yelp
22 | 
23 | 
24 | class SensuTTLAlerter(object):
25 |     """ This class triggers a sensu alert if the producer dies.  If the sensu_event_info is
26 |     not sent within the ttl specified the prior time it was sent then sensu will alert.
27 | 
28 |     Args:
29 |         sensu_event_info(dict): dictionary with sensu parameters.  For details see
30 |              http://pysensu-yelp.readthedocs.io/en/latest/index.html?highlight=send_event
31 |              for details
32 |         enable(bool): enable this ttl alert manager
33 |     """
34 | 
35 |     def __init__(self, sensu_event_info, enable=True):
36 |         self._sensu_event_info = sensu_event_info
37 |         self._enable = enable
38 | 
39 |     def process(self):
40 |         if self.enable:
41 |             pysensu_yelp.send_event(**self._sensu_event_info)
42 | 
43 |     @property
44 |     def enable(self):
45 |         return self._enable
46 | 
47 |     @enable.setter
48 |     def enable(self, new_enable_value):
49 |         if self._enable and not new_enable_value:
50 |             # send final message without ttl
51 |             final_sensu_info = copy.deepcopy(self._sensu_event_info)
52 |             final_sensu_info.pop('ttl')
53 |             pysensu_yelp.send_event(**final_sensu_info)
54 |         self._enable = new_enable_value
55 | 


--------------------------------------------------------------------------------
/tests/initialization_vector_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import pytest
20 | 
21 | from data_pipeline._encryption_helper import _AVSCStore
22 | from data_pipeline._encryption_helper import initialization_vector_info
23 | from data_pipeline.initialization_vector import get_initialization_vector
24 | from data_pipeline.meta_attribute import MetaAttribute
25 | 
26 | 
27 | @pytest.mark.usefixtures('containers')
28 | class TestInitializationVector(object):
29 | 
30 |     def test_create_vector_fails_with_bad_arg_values(self):
31 |         schema_id = _AVSCStore().get_schema_id(initialization_vector_info)
32 |         invalid_vector_payload_data = bytes(10)
33 | 
34 |         with pytest.raises(TypeError):
35 |             get_initialization_vector(
36 |                 schema_id,
37 |                 invalid_vector_payload_data
38 |             )
39 | 
40 |     def test_initialization_vector_creation(self):
41 |         schema_id = _AVSCStore().get_schema_id(initialization_vector_info)
42 |         vector_payload_data = b'0000000000000000'
43 | 
44 |         for _payload_data in [vector_payload_data, None]:
45 |             initialization_vector = get_initialization_vector(
46 |                 schema_id,
47 |                 _payload_data
48 |             )
49 |             assert isinstance(initialization_vector, MetaAttribute)
50 |             assert isinstance(initialization_vector.avro_repr['payload'], bytes)
51 | 


--------------------------------------------------------------------------------
/data_pipeline/schematizer_clientlib/models/meta_attr_source_mapping.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from collections import namedtuple
20 | 
21 | from data_pipeline.schematizer_clientlib.models.model_base import BaseModel
22 | 
23 | 
24 | """
25 | Represent the data of meta attribute mapping store. Meta attribute source
26 | mapping should represent a mapping of a source and the corresponding meta
27 | attribute schema id.
28 | """
29 | MetaAttributeSourceMapping = namedtuple(
30 |     'MetaAttributeSourceMapping',
31 |     ['source_id', 'meta_attribute_schema_id']
32 | )
33 | 
34 | 
35 | class _MetaAttributeSourceMapping(BaseModel):
36 |     """Internal class used to convert from/to various data structure and
37 |     facilitate constructing the return value of schematizer functions.
38 |     """
39 | 
40 |     def __init__(self, source_id, meta_attribute_schema_id):
41 |         self.source_id = source_id
42 |         self.meta_attribute_schema_id = meta_attribute_schema_id
43 | 
44 |     @classmethod
45 |     def from_response(cls, source_id, meta_attribute_schema_id):
46 |         return cls(
47 |             source_id=source_id,
48 |             meta_attribute_schema_id=meta_attribute_schema_id
49 |         )
50 | 
51 |     def to_result(self):
52 |         return MetaAttributeSourceMapping(
53 |             source_id=self.source_id,
54 |             meta_attribute_schema_id=self.meta_attribute_schema_id
55 |         )
56 | 


--------------------------------------------------------------------------------
/data_pipeline/schematizer_clientlib/models/meta_attr_namespace_mapping.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from collections import namedtuple
20 | 
21 | from data_pipeline.schematizer_clientlib.models.model_base import BaseModel
22 | 
23 | 
24 | """
25 | Represent the data of meta attribute mapping store. Meta attribute namespace
26 | mapping should represent a mapping of a namespace and the corresponding meta
27 | attribute schema id.
28 | """
29 | MetaAttributeNamespaceMapping = namedtuple(
30 |     'MetaAttributeNamespaceMapping',
31 |     ['namespace_id', 'meta_attribute_schema_id']
32 | )
33 | 
34 | 
35 | class _MetaAttributeNamespaceMapping(BaseModel):
36 |     """Internal class used to convert from/to various data structure and
37 |     facilitate constructing the return value of schematizer functions.
38 |     """
39 | 
40 |     def __init__(self, namespace_id, meta_attribute_schema_id):
41 |         self.namespace_id = namespace_id
42 |         self.meta_attribute_schema_id = meta_attribute_schema_id
43 | 
44 |     @classmethod
45 |     def from_response(cls, namespace_id, meta_attribute_schema_id):
46 |         return cls(
47 |             namespace_id=namespace_id,
48 |             meta_attribute_schema_id=meta_attribute_schema_id
49 |         )
50 | 
51 |     def to_result(self):
52 |         return MetaAttributeNamespaceMapping(
53 |             namespace_id=self.namespace_id,
54 |             meta_attribute_schema_id=self.meta_attribute_schema_id
55 |         )
56 | 


--------------------------------------------------------------------------------
/data_pipeline/team.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import yaml
20 | 
21 | from data_pipeline.config import get_config
22 | 
23 | 
24 | class Team(object):
25 |     """Contains some helper methods for dealing with the data pipeline team
26 |     configuration.  Eventually this class should encapsulate information about
27 |     each team.  That's overkill for the current use-case, so it's not
28 |     implemented yet.
29 | 
30 |     """
31 | 
32 |     @classmethod
33 |     def config(cls):
34 |         """Loads and decodes the
35 |         :attr:`data_pipeline.config.Config.data_pipeline_teams_config_file_path`.
36 | 
37 |         TODO(justinc|DATAPIPE-348): Cache team config, dealing with invalidation
38 |         when configuration changes.
39 | 
40 |         Returns:
41 |             dict: team configuration
42 |         """
43 |         config_path = get_config().data_pipeline_teams_config_file_path
44 |         return yaml.load(open(config_path).read())
45 | 
46 |     @classmethod
47 |     def team_names(cls):
48 |         """Lists all data pipeline teams
49 | 
50 |         Returns:
51 |             list of str: all valid data pipeline team names
52 |         """
53 |         return cls.config()['teams'].keys()
54 | 
55 |     @classmethod
56 |     def exists(cls, team_name):
57 |         """Determines if a team exists, by name.
58 | 
59 |         Returns:
60 |             bool: True if team_name exists for a valid team, false otherwise
61 |         """
62 |         return team_name in cls.team_names()
63 | 


--------------------------------------------------------------------------------
/Guardfile:
--------------------------------------------------------------------------------
 1 | # More info at https://github.com/guard/guard#readme
 2 | 
 3 | module ::Guard
 4 |   class MakeGuard < Plugin
 5 |     def make_docs(paths=nil)
 6 |       UI.info 'Rebuilding docs...'
 7 |       out = `make docs 2>&1`
 8 |       if $?.exitstatus == 0
 9 |         UI.info 'Docs built'
10 |       else
11 |         UI.error '`make docs` exited with non-zero status'
12 |         UI.debug out
13 |         throw :task_has_failed
14 |       end
15 |     end
16 | 
17 |     [:run_all, :run_on_additions, :run_on_modifications, :run_on_removals].each do |method|
18 |         alias_method method, :make_docs
19 |     end
20 |   end
21 | 
22 |   # Watchers should return :all to test all files, or the path of files to test
23 |   class PyTest < Plugin
24 |     def run_tests(test_files=[])
25 |       test_files = test_files.select{|f| File.exists?(f)}
26 |       UI.info "Running test files: #{test_files.join(', ')}"
27 |       cols = ENV['COLUMNS'].to_i - 10
28 |       out = `COLUMNS=#{cols} tox -e guard "#{test_files.join(' ')}"`
29 |       UI.info "Test Output:\n#{out}"
30 |     end
31 | 
32 |     def run_all(paths=nil)
33 |       run_tests
34 |     end
35 |     alias_method :run_on_removals, :run_all
36 | 
37 |     def run_on_additions(paths)
38 |       if paths == ['all']
39 |         run_all
40 |       else
41 |         run_tests(paths)
42 |       end
43 |     end
44 |     alias_method :run_on_modifications, :run_on_additions
45 |   end
46 | end
47 | 
48 | guard :pytest do
49 |   # Test the changed file and the corresponding test
50 |   watch(%r{^data_pipeline/(.+)\.py$}) {|m| [m[0], "tests/#{m[1]}_test.py"]  }
51 |   watch(%r{^data_pipeline/(async_producer|position_data|_kafka_producer|_position_data_builder)\.py$}) { 'tests/producer_test.py' }
52 |   watch(%r{^tests/(.+)\_test.py$})
53 |   watch(%r{^tests/helpers/(.+).py$}) { :all }
54 |   watch(%r{^tests/conftest.py$}) { :all }
55 |   watch('tox.ini') { :all }
56 |   # rst files in root and docs directories only
57 |   watch(%r{^([^/]+|docs/[^/]+)\.rst$})
58 | end
59 | 
60 | guard :make_guard do
61 |   watch(%r{^data_pipeline/(.+)\.py$})
62 |   # rst files in root and docs directories only
63 |   watch(%r{^([^/]+|docs/[^/]+)\.rst$})
64 |   watch('docs/conf.py')
65 |   watch('tox.ini')
66 | end
67 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/introspector/models/topic.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from data_pipeline.tools.introspector.models.base import BaseIntrospectorModel
20 | 
21 | 
22 | class IntrospectorTopic(BaseIntrospectorModel):
23 |     def __init__(self, topic_obj, kafka_topics=None, topics_to_range_map=None):
24 |         super(IntrospectorTopic, self).__init__(
25 |             topic_obj,
26 |             excluded_fields=['source']
27 |         )
28 |         self._fields = [
29 |             'name', 'topic_id', 'source_name', 'source_id', 'namespace',
30 |             'primary_keys', 'contains_pii', 'cluster_type'
31 |         ]
32 |         self.source_name = topic_obj.source.name
33 |         self.source_id = topic_obj.source.source_id
34 |         self.namespace = topic_obj.source.namespace.name
35 |         if kafka_topics is not None:
36 |             self.in_kafka = self.name in kafka_topics
37 |             self._fields.append('in_kafka')
38 |         if topics_to_range_map is not None:
39 |             self.message_count = self._get_topic_message_count(
40 |                 topics_to_range_map
41 |             )
42 |             if self.message_count:
43 |                 self.in_kafka = True
44 |             self._fields.append('message_count')
45 |         self._fields.extend(['created_at', 'updated_at'])
46 | 
47 |     def _get_topic_message_count(self, topics_to_range_map):
48 |         if self.name in topics_to_range_map:
49 |             return sum(topics_to_range_map[self.name].values())
50 |         return 0
51 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/introspector/register/base_command.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import simplejson
20 | 
21 | from data_pipeline.tools.introspector.base_command import IntrospectorCommand
22 | from data_pipeline.tools.introspector.models import IntrospectorSchema
23 | 
24 | 
25 | class _BaseRegisterCommand(IntrospectorCommand):
26 | 
27 |     @classmethod
28 |     def add_base_arguments(cls, parser):
29 |         super(_BaseRegisterCommand, cls).add_base_arguments(parser)
30 |         cls.add_source_and_namespace_arguments(parser)
31 | 
32 |         parser.add_argument(
33 |             "--source_owner_email",
34 |             type=str,
35 |             required=True,
36 |             help="The email of the owner of the given source."
37 |         )
38 | 
39 |         parser.add_argument(
40 |             "--contains-pii",
41 |             dest="pii",
42 |             default=False,
43 |             action="store_true",
44 |             help="Flag indicating if schema contains pii. More info at y/pii"
45 |         )
46 | 
47 |     def process_args(self, args, parser):
48 |         super(_BaseRegisterCommand, self).process_args(args, parser)
49 |         self.process_source_and_namespace_args(args, parser)
50 |         self.source_owner_email = args.source_owner_email
51 |         self.pii = args.pii
52 | 
53 |     def print_schema(self, schema):
54 |         schema_dict = IntrospectorSchema(
55 |             schema,
56 |             include_topic_info=True
57 |         ).to_ordered_dict()
58 |         print simplejson.dumps(schema_dict)
59 | 


--------------------------------------------------------------------------------
/data_pipeline/expected_frequency.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from enum import Enum
20 | 
21 | 
22 | class ExpectedFrequency(Enum):
23 |     """Helper constants specifying how frequently the client expects to produce
24 |     or consume messages.  Any positive integer number of seconds can be used,
25 |     these are provided for convenience only.  Expected frequency will be used
26 |     to infer schema deprecation.
27 | 
28 |     For example, if a client registers to produce messages constantly, and a
29 |     few months later we observe that the client hasn't published messages using
30 |     an older schema verison in a month, but does regularly publish using a
31 |     newer version, we can infer that the older schema version is deprecated
32 |     and send out a deprecation/migration notice.
33 | 
34 |     Attributes:
35 |       constantly: client expects to always and continuously be producing and
36 |         consuming messages.
37 |       hourly: client expects to come online to produce or consume messages
38 |         approximately every hour.
39 |       weekly: client expects to come online to produce or consume messages about
40 |         once a week.
41 |       monthly: client expects to come online to produce or consume messages about
42 |         once a month.
43 |       yearly: client expects to come online to produce or consume messages about
44 |         once a year.
45 |     """
46 |     constantly = 0
47 |     hourly = 60 * 60
48 |     daily = hourly * 24
49 |     weekly = daily * 7
50 |     monthly = daily * 30
51 |     yearly = daily * 365
52 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/meteorite_gauge_manager.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from data_pipeline.tools.heartbeat_periodic_processor import BasePeriodicProcessor
20 | from data_pipeline.tools.meteorite_wrappers import StatGauge
21 | 
22 | 
23 | class MeteoriteGaugeManager(BasePeriodicProcessor):
24 |     """
25 |     This class reports how far behind real-time the producer is to meteorite/signalfx
26 | 
27 |     Args:
28 |        interval_in_seconds(int): the time interval between two events.
29 |        stats_gauge_name(str): name of the stats gauge
30 |        container_name(str): paasta container name
31 |        container_env(str): paasta cluster name
32 |        disable(bool): whether this gauge is disabled or not
33 |        kwargs(dict): any additional keyword args for the Meteorite StatsGauge class
34 |     """
35 | 
36 |     def __init__(
37 |         self,
38 |         interval_in_seconds,
39 |         stats_gauge_name=None,
40 |         container_name=None,
41 |         container_env=None,
42 |         disable=False,
43 |         **kwargs
44 |     ):
45 |         super(MeteoriteGaugeManager, self).__init__(interval_in_seconds)
46 |         self.gauge = StatGauge(
47 |             stats_gauge_name,
48 |             container_name=container_name,
49 |             container_env=container_env,
50 |             **kwargs
51 |         )
52 |         self.disable = disable
53 | 
54 |     def process(self, timestamp):
55 |         if self.disable:
56 |             return
57 | 
58 |         delay_seconds = (self._utc_now - timestamp).total_seconds()
59 |         self.gauge.set(delay_seconds)
60 | 


--------------------------------------------------------------------------------
/tests/benchmarks/producer_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import time
20 | 
21 | import mock
22 | import pytest
23 | 
24 | from data_pipeline.expected_frequency import ExpectedFrequency
25 | from data_pipeline.producer import Producer
26 | from tests.factories.base_factory import MessageFactory
27 | 
28 | 
29 | @pytest.mark.usefixtures(
30 |     "configure_teams",
31 |     "config_benchmark_containers_connections"
32 | )
33 | @pytest.mark.benchmark
34 | class TestBenchProducer(object):
35 | 
36 |     @pytest.yield_fixture
37 |     def patch_monitor_init_start_time_to_now(self):
38 |         with mock.patch(
39 |             'data_pipeline.client._Monitor.get_monitor_window_start_timestamp',
40 |             return_value=int(time.time())
41 |         ) as patched_start_time:
42 |             yield patched_start_time
43 | 
44 |     @pytest.yield_fixture
45 |     def dp_producer(self, team_name):
46 |         with Producer(
47 |             producer_name='producer_1',
48 |             team_name=team_name,
49 |             expected_frequency_seconds=ExpectedFrequency.constantly,
50 |             use_work_pool=False
51 |         ) as producer:
52 |             yield producer
53 | 
54 |     def test_publish(self, benchmark, dp_producer):
55 | 
56 |         def setup():
57 |             return [MessageFactory.create_message_with_payload_data()], {}
58 | 
59 |         # Publishing a message takes 1ms on average.
60 |         # Messages are flushed every 100ms.
61 |         # config::kafka_producer_flush_time_limit_seconds
62 |         #
63 |         # Perform 2000 rounds to ensure 20 flushes.
64 |         benchmark.pedantic(dp_producer.publish, setup=setup, rounds=2000)
65 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/introspector/list_command/base_command.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from data_pipeline.tools.introspector.base_command import IntrospectorCommand
20 | 
21 | 
22 | class _BaseListCommand(IntrospectorCommand):
23 | 
24 |     @classmethod
25 |     def add_base_arguments(cls, parser):
26 |         super(_BaseListCommand, cls).add_base_arguments(parser)
27 | 
28 |         parser.add_argument(
29 |             "-s",
30 |             "--sort-by",
31 |             type=str,
32 |             default=None,
33 |             help="Sort the listing by a particular field of the object "
34 |                  "in ascending order (by default)"
35 |         )
36 | 
37 |         parser.add_argument(
38 |             "--descending-order", "--desc",
39 |             action="store_true",
40 |             default=False,
41 |             help="Use --sort-by with descending order (Will be ignored if --sort-by is not set)"
42 |         )
43 | 
44 |     @classmethod
45 |     def get_description(cls):
46 |         return "List {}, as a JSON array of formatted {}. Fields: {}".format(
47 |             cls.list_type, cls.list_type, cls.fields
48 |         )
49 | 
50 |     def process_args(self, args, parser):
51 |         super(_BaseListCommand, self).process_args(args, parser)
52 |         self.sort_by = args.sort_by
53 |         self.descending_order = args.descending_order
54 |         if self.sort_by and self.sort_by not in self.fields:
55 |             raise parser.error(
56 |                 "You can not sort_by by {} for list type {}. Possible fields are: {}".format(
57 |                     self.sort_by, self.list_type, self.fields
58 |                 )
59 |             )
60 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/introspector/list_command/topics.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import simplejson
20 | 
21 | from data_pipeline.tools.introspector.list_command.base_command import _BaseListCommand
22 | 
23 | 
24 | class TopicsListCommand(_BaseListCommand):
25 | 
26 |     list_type = 'topics'
27 |     fields = [
28 |         'name', 'topic_id', 'contains_pii',
29 |         'primary_keys',
30 |         'in_kafka', 'message_count',
31 |         'source_name', 'source_id',
32 |         'namespace',
33 |         'created_at', 'updated_at'
34 |     ]
35 | 
36 |     @classmethod
37 |     def add_parser(cls, subparsers):
38 |         list_command_parser = subparsers.add_parser(
39 |             "topics",
40 |             description=cls.get_description(),
41 |             add_help=False
42 |         )
43 | 
44 |         cls.add_base_arguments(list_command_parser)
45 |         cls.add_source_and_namespace_arguments(list_command_parser)
46 | 
47 |         list_command_parser.set_defaults(
48 |             command=lambda args:
49 |                 cls("data_pipeline_introspector_list").run(args, list_command_parser)
50 |         )
51 | 
52 |     def process_args(self, args, parser):
53 |         super(TopicsListCommand, self).process_args(args, parser)
54 |         self.process_source_and_namespace_args(args, parser)
55 | 
56 |     def run(self, args, parser):
57 |         self.process_args(args, parser)
58 |         print simplejson.dumps(self.list_topics(
59 |             source_id=self.source_id,
60 |             namespace_name=self.namespace,
61 |             source_name=self.source_name,
62 |             sort_by=self.sort_by,
63 |             descending_order=self.descending_order
64 |         ))
65 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # This file is used to build the data-pipeline-tools debian package.  Because
 2 | # of this, it installs the tools extras dependencies.  All versions should be
 3 | # pinned here, because  these packages will be included in a virtualenv
 4 | # deployed inside a debian package - so the build must be repeatable.
 5 | # This list was originally constructed by running
 6 | # `pip install -i https://pypi.yelpcorp.com/simple/ ".[tools]"`
 7 | # and `pip freeze > requirements.txt` in a virtualenv.
 8 | #
 9 | # Specifically, run:
10 | #
11 | # virtualenv tools
12 | # source tools/bin/activate
13 | # pip install --upgrade pip
14 | # pip install -b /nail/tmp -i https://pypi.yelpcorp.com/simple/ ".[tools]"
15 | # pip freeze > requirements.tmp
16 | # deactivate
17 | # rm -rf tools
18 | #
19 | # Note that the data_pipeline package should not appear here, and that cffi
20 | # should be at least 1.2.1 for trusty compatibility.
21 | 
22 | argparse==1.4.0
23 | boto==2.38.0
24 | bravado==8.3.0
25 | bravado-core==4.5.0
26 | bravado-decorators==0.6.0
27 | cached-property==1.2.0
28 | cffi==1.6.0
29 | characteristic==14.1.0
30 | contextdecorator==0.10.0
31 | crochet==1.4.0
32 | cryptography==1.3.4
33 | dateglob==0.1
34 | enum34==1.0.4
35 | fido==4.0.1
36 | functools32==3.2.3-2
37 | future==0.14.3
38 | futures==3.0.3
39 | geogrid==1.0.9
40 | ipaddress==1.0.14
41 | iso8601==0.1.10
42 | jsonschema==2.5.1
43 | kafka-python==0.9.5.post6
44 | kafka-utils==0.4.2
45 | kazoo==2.2
46 | mysqlclient==1.3.6
47 | ply==3.4
48 | psutil==4.2.0
49 | pycparser==2.14
50 | pycrypto==2.6.1
51 | pyOpenSSL==0.14.0
52 | pysensu_yelp==0.2.3
53 | PyStaticConfiguration==0.9.0
54 | pysubnettree==0.23
55 | python-dateutil==2.4.2
56 | pytz==2014.10
57 | PyYAML==3.11
58 | replication-delay-client==1.1.1
59 | repoze.lru==0.6
60 | requests==2.8.1
61 | retrying==1.3.3
62 | send-nsca==0.1.4.1
63 | setproctitle==1.1.8
64 | simplejson==3.6.5
65 | six==1.9.0
66 | SQLAlchemy==0.9.8
67 | subprocess32==3.2.6
68 | swagger-spec-validator==2.0.2
69 | thriftpy==0.1.15
70 | tornado==2.4.1
71 | Twisted==15.4.0
72 | data-pipeline-avro-util==0.2.1
73 | yelp-avro==1.9.2
74 | yelp-batch==0.19.8
75 | yelp-bytes==0.2.0
76 | yelp-cgeom==1.3.1
77 | yelp-clog==2.5.2
78 | yelp-conn==7.1.3
79 | yelp-encodings==0.1.3
80 | yelp-kafka==5.1.1
81 | yelp-lib==11.0.2
82 | yelp-logging==1.4.1
83 | yelp-meteorite==0.2.4
84 | yelp-servlib==4.7.0
85 | yelp-uri==1.1.0
86 | zope.interface==4.1.2
87 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/heartbeat_periodic_processor.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from datetime import datetime
20 | from datetime import timedelta
21 | 
22 | from dateutil.tz import tzutc
23 | 
24 | 
25 | class BasePeriodicProcessor(object):
26 |     """ This class provides an interface for handling periodic events that can
27 |     be triggered by a heartbeat event, like sensu alert and data event checkpoint.
28 |     That is, it serves as a base class and must be subclassed.
29 | 
30 |     Args:
31 |       interval_in_seconds(int): the time interval between two events.
32 |     """
33 | 
34 |     def __init__(self, interval_in_seconds):
35 |         self.interval_in_seconds = interval_in_seconds
36 |         self._next_process_time = self._utc_now
37 | 
38 |     def periodic_process(self, timestamp=None):
39 |         """ This method remains because it's called by the replication handler;
40 |         if / when we start calling the process method below directly from the
41 |         replication handler we can remove it (DATAPIPE-1435)
42 |         Args:
43 |             timestamp(datetime.datetime): the datetime of the event with utc
44 |         """
45 |         if self._should_process():
46 |             self.process(timestamp)
47 |             self._next_process_time = self._compute_next_process_time()
48 | 
49 |     def process(self, timestamp=None):
50 |         raise NotImplementedError
51 | 
52 |     def _should_process(self):
53 |         return self._utc_now >= self._next_process_time
54 | 
55 |     def _compute_next_process_time(self):
56 |         return self._utc_now + timedelta(seconds=self.interval_in_seconds)
57 | 
58 |     @property
59 |     def _utc_now(self):
60 |         return datetime.now(tzutc())
61 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/introspector/list_command/namespaces.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import simplejson
20 | 
21 | from data_pipeline.tools.introspector.list_command.base_command import _BaseListCommand
22 | 
23 | 
24 | class NamespacesListCommand(_BaseListCommand):
25 | 
26 |     list_type = 'namespaces'
27 |     fields = [
28 |         'name', 'namespace_id',
29 |         'active_topic_count', 'active_source_count'
30 |     ]
31 | 
32 |     @classmethod
33 |     def add_parser(cls, subparsers):
34 |         list_command_parser = subparsers.add_parser(
35 |             "namespaces",
36 |             description=cls.get_description(),
37 |             add_help=False
38 |         )
39 | 
40 |         list_command_parser.add_argument(
41 |             '--active-namespaces',
42 |             default=False,
43 |             action='store_true',
44 |             help=(
45 |                 'If set, this command will also return information about active '
46 |                 'sources and topics within each namespace. '
47 |                 'This is a time expensive operation.'
48 |             )
49 |         )
50 | 
51 |         cls.add_base_arguments(list_command_parser)
52 | 
53 |         list_command_parser.set_defaults(
54 |             command=lambda args:
55 |                 cls("data_pipeline_introspector_list_namespaces").run(
56 |                     args,
57 |                     list_command_parser
58 |                 )
59 |         )
60 | 
61 |     def run(self, args, parser):
62 |         self.process_args(args, parser)
63 |         print simplejson.dumps(self.list_namespaces(
64 |             sort_by=self.sort_by,
65 |             descending_order=self.descending_order,
66 |             active_namespaces=args.active_namespaces
67 |         ))
68 | 


--------------------------------------------------------------------------------
/tests/benchmarks/logging_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import mock
20 | import pytest
21 | 
22 | from data_pipeline.config import get_config
23 | from data_pipeline.helpers.log import debug_log
24 | from tests.factories.base_factory import MessageFactory
25 | 
26 | 
27 | @pytest.mark.usefixtures(
28 |     "config_benchmark_containers_connections"
29 | )
30 | @pytest.mark.benchmark
31 | class TestBenchLogging(object):
32 | 
33 |     @pytest.fixture(
34 |         params=[True, False],
35 |         ids=['logger enabled', 'logger disabled']
36 |     )
37 |     def logger_enabled(self, request):
38 |         return request.param
39 | 
40 |     @pytest.yield_fixture
41 |     def patch_logger_enabled(self, logger_enabled):
42 |         with mock.patch(
43 |             'data_pipeline.config.logging.Logger'
44 |             '.isEnabledFor',
45 |             return_value=logger_enabled
46 |         ):
47 |             yield
48 | 
49 |     @pytest.fixture
50 |     def message(self):
51 |         return MessageFactory.create_message_with_payload_data()
52 | 
53 |     def test_debug_log(self, message, benchmark, patch_logger_enabled):
54 |         @benchmark
55 |         def log_command():
56 |             debug_log(lambda: "Message buffered: {}".format(repr(message)))
57 | 
58 |     def test_logger(self, benchmark, message, patch_logger_enabled):
59 |         @benchmark
60 |         def log_command():
61 |             get_config().logger.debug("Message buffered: {}".format(repr(message)))
62 | 
63 |     def test_repr_message(self, benchmark, message):
64 |         @benchmark
65 |         def log_command():
66 |             repr(message)
67 | 
68 |     def test_pass(self, benchmark, message):
69 |         @benchmark
70 |         def log_command():
71 |             pass
72 | 


--------------------------------------------------------------------------------
/data_pipeline/message_type.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from enum import Enum
20 | 
21 | 
22 | class _EnumRepr(Enum):
23 |     def __repr__(self):
24 |         return '{}.{}({})'.format(
25 |             self.__class__.__name__,
26 |             self.name,
27 |             self.value
28 |         )
29 | 
30 | 
31 | class MessageType(_EnumRepr):
32 |     """Messages should be published primarily using the create, update, and
33 |     delete types.  Refresh messages should only be produced if you know what
34 |     you're doing, if in doubt, ask please.
35 | 
36 |     Attributes:
37 |       create: when new data is created, the payload contains the contents of
38 |         the new row
39 |       update: when data is updated, payload contains the new content and
40 |         previous_payload contains the old content of the row
41 |       delete: when rows are removed, the payload contains the content of the
42 |         row before removal
43 |       refresh: refresh messages are used to intially populate a topic, they
44 |         do not correspond to any particular data change
45 |     """
46 |     log = 0
47 |     create = 1
48 |     update = 2
49 |     delete = 3
50 |     refresh = 4
51 | 
52 | 
53 | class _ProtectedMessageType(_EnumRepr):
54 |     """Protected message types should generally be avoided.  The clientlib
55 |     won't expose these messages to users, they're used internally only.
56 | 
57 |     Attributes:
58 |       heartbeat: emitted periodically on low volume topics so auditing
59 |         processes can differentiate between slow or stalled topics and
60 |         topics without messages.
61 |       monitor: monitor messages are used to count the number of messages
62 |         produced/consumed by client in a given time frame
63 |     """
64 |     heartbeat = 5
65 |     monitor = 6
66 |     registration = 7
67 | 


--------------------------------------------------------------------------------
/tests/tools/sensu_ttl_alerter_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import mock
20 | import pysensu_yelp
21 | import pytest
22 | 
23 | from data_pipeline.tools.sensu_ttl_alerter import SensuTTLAlerter
24 | 
25 | 
26 | class TestSensuTTLAlerter(object):
27 | 
28 |     @pytest.fixture
29 |     def sensu_ttl_alerter(self):
30 |         test_dict = {
31 |             "name": "datapipeline_ttl_alerter_test",
32 |             "output": "this is only a test of the datapipeline test alerter",
33 |             "irc_channels": "#bam",
34 |             "check_every": 60,
35 |             "ttl": "300s",
36 |             "runbook": "y/datapipeline",
37 |             "status": 0,
38 |             "team": "bam"
39 |         }
40 |         return SensuTTLAlerter(test_dict, enable=True)
41 | 
42 |     @pytest.yield_fixture
43 |     def mocked_send_event(self, sensu_ttl_alerter):
44 |         with mock.patch.object(
45 |             pysensu_yelp,
46 |             'send_event',
47 |             autospec=True
48 |         ) as mocked_send_event:
49 |             yield mocked_send_event
50 | 
51 |     def test_send_event_while_enabled(self, sensu_ttl_alerter, mocked_send_event):
52 |         sensu_ttl_alerter.process()
53 |         assert mocked_send_event.call_count == 1
54 | 
55 |     def test_toggling_enable_to_false(self, sensu_ttl_alerter, mocked_send_event):
56 |         sensu_ttl_alerter.enable = False
57 |         assert mocked_send_event.call_count == 1
58 |         assert 'ttl' not in mocked_send_event.call_args
59 | 
60 |     def test_no_send_event_while_disabled(self, sensu_ttl_alerter, mocked_send_event):
61 |         # there's one call when we toggle from True to False
62 |         sensu_ttl_alerter.enable = False
63 |         assert mocked_send_event.call_count == 1
64 |         # there should be no further calls
65 |         sensu_ttl_alerter.process()
66 |         assert mocked_send_event.call_count == 1
67 | 


--------------------------------------------------------------------------------
/tests/tools/meteorite_gauge_manager_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from datetime import datetime
20 | from datetime import timedelta
21 | 
22 | import mock
23 | from dateutil.tz import tzutc
24 | 
25 | from data_pipeline.tools.meteorite_gauge_manager import MeteoriteGaugeManager
26 | 
27 | 
28 | class TestMeteoriteGaugeManager(object):
29 | 
30 |     @mock.patch('yelp_meteorite.metrics.Gauge.set', autospec=True)
31 |     def test_gauge_manager_call_count(self, mock_set):
32 |         gauge = MeteoriteGaugeManager(
33 |             interval_in_seconds=10,
34 |             stats_gauge_name='test_gauge'
35 |         )
36 |         ts = datetime.now(tzutc())
37 |         gauge.process(ts)
38 |         assert mock_set.call_count == 1
39 | 
40 |     @mock.patch('yelp_meteorite.metrics.Gauge.set', autospec=True)
41 |     def test_gauge_manager_process_args(self, mock_set):
42 |         with mock.patch(
43 |             'data_pipeline.tools.meteorite_gauge_manager.MeteoriteGaugeManager._utc_now',
44 |             new_callable=mock.PropertyMock
45 |         ) as utc_now:
46 |             fake_time = datetime(year=2016, month=1, day=1)
47 |             utc_now.return_value = fake_time
48 |             gauge = MeteoriteGaugeManager(
49 |                 interval_in_seconds=10,
50 |                 stats_gauge_name='test_gauge'
51 |             )
52 |             ts = fake_time - timedelta(seconds=60)
53 |             gauge.process(ts)
54 |             assert mock_set.call_args[0][1] == 60.0
55 | 
56 |     @mock.patch('yelp_meteorite.metrics.Gauge.set', autospec=True)
57 |     def test_gauge_manager_disabled(self, mock_set):
58 |         gauge = MeteoriteGaugeManager(
59 |             interval_in_seconds=10,
60 |             stats_gauge_name='test_gauge',
61 |             disable=True
62 |         )
63 |         ts = datetime.now(tzutc())
64 |         gauge.process(ts)
65 |         assert mock_set.call_count == 0
66 | 


--------------------------------------------------------------------------------
/data_pipeline/_kafka_util.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from kafka_utils.util.offsets import get_topics_watermarks
20 | 
21 | 
22 | def get_actual_published_messages_count(
23 |     kafka_client,
24 |     topics,
25 |     topic_tracked_offset_map,
26 |     raise_on_error=True,
27 | ):
28 |     """Get the actual number of published messages of specified topics.
29 | 
30 |     Args:
31 |         kafka_client (kafka.client.KafkaClient): kafka client
32 |         topics ([str]): List of topic names to get message count
33 |         topic_tracked_offset_map (dict(str, int)): dictionary which
34 |             contains each topic and its current stored offset value.
35 |         raise_on_error (Optional[bool]): if False,  the function ignores
36 |             missing topics and missing partitions. It still may fail on
37 |             the request send.  Default to True.
38 | 
39 |     Returns:
40 |         dict(str, int): Each topic and its actual published messages count
41 |             since last offset.  If a topic or partition is missing when
42 |             `raise_on_error` is False, the returned dict will not contain
43 |             the missing topic.
44 | 
45 |     Raises:
46 |         :class:`~yelp_kafka.error.UnknownTopic`: upon missing topics and
47 |             raise_on_error=True
48 |         :class:`~yelp_kafka.error.UnknownPartition`: upon missing partitions
49 |         and raise_on_error=True
50 |         FailedPayloadsError: upon send request error.
51 |     """
52 |     topic_watermarks = get_topics_watermarks(
53 |         kafka_client,
54 |         topics,
55 |         raise_on_error=raise_on_error
56 |     )
57 | 
58 |     topic_to_published_msgs_count = {}
59 |     for topic, partition_offsets in topic_watermarks.iteritems():
60 |         high_watermark = partition_offsets[0].highmark
61 |         offset = topic_tracked_offset_map.get(topic, 0)
62 |         topic_to_published_msgs_count[topic] = high_watermark - offset
63 | 
64 |     return topic_to_published_msgs_count
65 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/introspector/schema_check_command.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from data_pipeline.tools.introspector.base_command import IntrospectorCommand
20 | 
21 | 
22 | class SchemaCheckCommand(IntrospectorCommand):
23 |     @classmethod
24 |     def add_parser(cls, subparsers):
25 |         schema_check_command_parser = subparsers.add_parser(
26 |             "schema-check",
27 |             description="Checks the compatibility of an avro schema and all"
28 |                         " given avro_schemas within the given namespace"
29 |                         " and source. Compatibility means that the schema can"
30 |                         " deserialize data serialized by existing schemas within"
31 |                         " all topics and vice-versa.",
32 |             add_help=False
33 |         )
34 | 
35 |         cls.add_base_arguments(schema_check_command_parser)
36 |         cls.add_source_and_namespace_arguments(schema_check_command_parser)
37 | 
38 |         schema_check_command_parser.add_argument(
39 |             "schema",
40 |             type=str,
41 |             help="The avro schema to check."
42 |         )
43 | 
44 |         schema_check_command_parser.set_defaults(
45 |             command=lambda args: cls("data_pipeline_instropsector_schema_check").run(
46 |                 args, schema_check_command_parser
47 |             )
48 |         )
49 | 
50 |     def process_args(self, args, parser):
51 |         super(SchemaCheckCommand, self).process_args(args, parser)
52 |         self.process_source_and_namespace_args(args, parser)
53 |         self.schema = args.schema
54 | 
55 |     def is_compatible(self):
56 |         is_compatible = self.schematizer.is_avro_schema_compatible(
57 |             avro_schema_str=self.schema,
58 |             source_name=self.source_name,
59 |             namespace_name=self.namespace
60 |         )
61 |         return is_compatible
62 | 
63 |     def run(self, args, parser):
64 |         self.process_args(args, parser)
65 |         print {"is_compatible": self.is_compatible()}
66 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | # Removed for now:
 3 | # py34, py35, pypy
 4 | envlist = py27, docs
 5 | skipsdist = true
 6 | indexserver =
 7 |     default = https://pypi.yelpcorp.com/simple/
 8 | 
 9 | [testenv]
10 | basepython = python2.7
11 | envdir = venv/py27
12 | setenv =
13 |     PIP_INDEX_URL = https://pypi.yelpcorp.com/simple
14 | venv_update = {toxinidir}/bin/venv-update venv= {envdir} install= 
15 | commands =
16 | 	{[testenv]venv_update} -r {toxinidir}/requirements.d/dev-internal.txt
17 |     py.test -c tox.ini --cov=data_pipeline --maxfail=3 --benchmark-skip tests/
18 |     pre-commit run --all-files
19 | 
20 | [testenv:pre-commit]
21 | envdir = venv/pre_commit
22 | commands = 
23 | 	{[testenv]venv_update} -r {toxinidir}/requirements.d/pre_commit.txt
24 | 	pre-commit {posargs}
25 | 
26 | [testenv:guard]
27 | envlist = py27
28 | envdir = venv/py27
29 | commands =
30 | 	{[testenv]venv_update} -r {toxinidir}/requirements.d/dev-internal.txt
31 |     py.test -c tox.ini --doctest-modules -m "not pending" {posargs}
32 | 
33 | [testenv:docs]
34 | envdir = venv/py27
35 | commands =
36 | 	{[testenv]venv_update} -r {toxinidir}/requirements.d/dev-internal.txt
37 |     sphinx-apidoc -f -e -o docs/code data_pipeline
38 |     sphinx-build -b html -d docs/build/doctrees docs/ docs/build/html
39 | 
40 | [testenv:devenv]
41 | envdir = venv/py27
42 | commands =
43 | 	{[testenv]venv_update} -r {toxinidir}/requirements.d/dev-internal.txt
44 | 
45 | [testenv:devenv-command]
46 | envdir = venv/py27
47 | commands = 
48 | 	{[testenv]venv_update} -r {toxinidir}/requirements.d/dev-internal.txt
49 | 	{posargs}
50 | 
51 | [testenv:benchmark]
52 | envdir = venv/py27
53 | commands = 	
54 | 	{[testenv]venv_update} -r {toxinidir}/requirements.d/dev-internal.txt
55 | 	docker-compose --file=docker-compose.yml --file=data_pipeline/testing_helpers/docker-compose.yml kill pypy
56 | 	docker-compose --file=docker-compose.yml --file=data_pipeline/testing_helpers/docker-compose.yml rm -v --force pypy
57 | 	docker-compose --file=docker-compose.yml --file=data_pipeline/testing_helpers/docker-compose.yml build pypy
58 | 	docker-compose --file=docker-compose.yml --file=data_pipeline/testing_helpers/docker-compose.yml run pypy /dp_reqs/venv/bin/py.test -m "benchmark" --benchmark-verbose {posargs}
59 | 
60 | [flake8]
61 | ignore =
62 | exclude = .git,.tox,docs,virtualenv_run,venv,__pycache__,.ropeproject,debian,dist
63 | filename = *.py,*.wsgi
64 | max-line-length = 131
65 | 
66 | [pytest]
67 | addopts = -m"not benchmark" -m"not skip" --ignore=setup.py --doctest-glob=*.rst -vv
68 | 
69 | [pep8]
70 | # E265 deals with spacing inside of comments - breaks human formatting
71 | # E309 puts a blank line after class declarations - doesn't work well with docstrings
72 | # E501 reformats lines to fit in --max-line-length poorly
73 | ignore = E265,E309,E501
74 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/introspector/list_command/sources.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import simplejson
20 | 
21 | from data_pipeline.tools.introspector.list_command.base_command import _BaseListCommand
22 | 
23 | 
24 | class SourcesListCommand(_BaseListCommand):
25 | 
26 |     list_type = 'sources'
27 |     fields = [
28 |         'name', 'source_id', 'owner_email',
29 |         'namespace', 'active_topic_count'
30 |     ]
31 | 
32 |     @classmethod
33 |     def add_parser(cls, subparsers):
34 |         list_command_parser = subparsers.add_parser(
35 |             "sources",
36 |             description=cls.get_description(),
37 |             add_help=False
38 |         )
39 | 
40 |         list_command_parser.add_argument(
41 |             '--active-sources',
42 |             default=False,
43 |             action='store_true',
44 |             help=(
45 |                 'If set, this command will also return information about active '
46 |                 'topics within each source. This is a time expensive operation.'
47 |             )
48 |         )
49 | 
50 |         cls.add_base_arguments(list_command_parser)
51 | 
52 |         list_command_parser.add_argument(
53 |             "--namespace",
54 |             type=str,
55 |             default=None,
56 |             help="Namespace name that contains a source of source name given. "
57 |                  "If --source-id is given, then this will be ignored."
58 |         )
59 | 
60 |         list_command_parser.set_defaults(
61 |             command=lambda args:
62 |                 cls("data_pipeline_introspector_list").run(args, list_command_parser)
63 |         )
64 | 
65 |     def process_args(self, args, parser):
66 |         super(SourcesListCommand, self).process_args(args, parser)
67 |         self.namespace = args.namespace
68 | 
69 |     def run(self, args, parser):
70 |         self.process_args(args, parser)
71 |         print simplejson.dumps(self.list_sources(
72 |             namespace_name=self.namespace,
73 |             sort_by=self.sort_by,
74 |             descending_order=self.descending_order,
75 |             active_sources=args.active_sources
76 |         ))
77 | 


--------------------------------------------------------------------------------
/tests/_fast_uuid_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import mock
20 | import pytest
21 | 
22 | import data_pipeline._fast_uuid
23 | from data_pipeline._fast_uuid import _DefaultUUID
24 | from data_pipeline._fast_uuid import _LibUUID
25 | from data_pipeline._fast_uuid import FastUUID
26 | 
27 | 
28 | class TestFastUUID(object):
29 | 
30 |     @pytest.fixture(params=[True, False])
31 |     def libuuid_available(self, request):
32 |         return request.param
33 | 
34 |     @pytest.yield_fixture
35 |     def fast_uuid(self, libuuid_available):
36 |         if libuuid_available:
37 |             yield FastUUID()
38 |         else:
39 |             with mock.patch.object(
40 |                 data_pipeline._fast_uuid,
41 |                 'FFI',
42 |                 side_effect=Exception
43 |             ):
44 |                 # Save and restore the existing state; this will allow already
45 |                 # instantiated FastUUID instances to keep working.
46 |                 original_ffi = data_pipeline._fast_uuid._LibUUID._ffi
47 |                 data_pipeline._fast_uuid._LibUUID._ffi = None
48 |                 try:
49 |                     yield FastUUID()
50 |                 finally:
51 |                     data_pipeline._fast_uuid._LibUUID._ffi = original_ffi
52 | 
53 |     def test_uuid1(self, fast_uuid):
54 |         assert self._is_valid_uuid(fast_uuid.uuid1())
55 | 
56 |     def test_uuid1_does_not_repeat(self, fast_uuid):
57 |         assert fast_uuid.uuid1() != fast_uuid.uuid1()
58 | 
59 |     def test_uuid4(self, fast_uuid):
60 |         assert self._is_valid_uuid(fast_uuid.uuid1())
61 | 
62 |     def test_uuid4_does_not_repeat(self, fast_uuid):
63 |         assert fast_uuid.uuid4() != fast_uuid.uuid4()
64 | 
65 |     def _is_valid_uuid(self, uuid_val):
66 |         return isinstance(uuid_val, str) and len(uuid_val) == 16
67 | 
68 |     def test_use_libuuid_when_available(self, fast_uuid, libuuid_available):
69 |         fast_uuid.uuid1()
70 |         if libuuid_available:
71 |             assert isinstance(fast_uuid._uuid_in_use, _LibUUID)
72 |         else:
73 |             assert isinstance(fast_uuid._uuid_in_use, _DefaultUUID)
74 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | CURRENT_VERSION=$(strip $(shell sed -n -r "s/__version__ = '(.+)'/\1/p" $(CURDIR)/data_pipeline/__init__.py))
 2 | NEXT_VERSION=$(shell echo $(CURRENT_VERSION) | awk -F. '/[0-9]+\./{$$NF+=1;OFS=".";print}')
 3 | 
 4 | REBUILD_FLAG =
 5 | 
 6 | .PHONY: help all production clean clean-pyc clean-build clean-docs clean-vim lint test docs coverage install-hooks release prepare-release compose-prefix
 7 | 
 8 | help:
 9 | 	@echo "clean-build - remove build artifacts"
10 | 	@echo "clean-pyc - remove Python file artifacts"
11 | 	@echo "clean-docs - remove doc creation artifacts"
12 | 	@echo "clean-vim - remove vim swap file artifacts"
13 | 	@echo "test - run tests quickly with the default Python"
14 | 	@echo "coverage - check code coverage"
15 | 	@echo "docs - generates Sphinx HTML documentation, including API docs"
16 | 	@echo "compose-prefix - generates a preconfigured docker-compose command"
17 | 	@echo "prepare-release - Bump the version number and add a changelog entry (pushmasters only)"
18 | 	@echo "release - Commit the latest version, tag the commit, and push it (pushmasters only)"
19 | 
20 | all: production install-hooks
21 | 
22 | production:
23 | 	@true
24 | 
25 | clean: clean-build clean-pyc clean-docs
26 | 
27 | clean-build:
28 | 	rm -fr build/
29 | 	rm -fr dist/
30 | 	rm -fr *.egg-info
31 | 
32 | clean-pyc:
33 | 	find . -name '*.pyc' -exec rm -f {} +
34 | 	find . -name '*.pyo' -exec rm -f {} +
35 | 	find . -name '*~' -exec rm -f {} +
36 | 
37 | clean-docs:
38 | 	rm -rf docs/build/*
39 | 	rm -rf docs/code/*
40 | 
41 | clean-vim:
42 | 	find . -name '*.swp' -exec rm -f {} +
43 | 	find . -name '*.swo' -exec rm -f {} +
44 | 
45 | test:
46 | 	# This will timeout after 15 minutes, in case there is a hang on jenkins
47 | 	PULL_CONTAINERS=true FORCE_FRESH_CONTAINERS=true timeout -9 1800 tox -c tox.ini $(REBUILD_FLAG)
48 | 
49 | docs: clean-docs 
50 | 	tox -c tox.ini -e docs $(REBUILD_FLAG)
51 | 
52 | coverage: test
53 | 
54 | install-hooks:
55 | 	tox -c tox.ini -e pre-commit -- install -f --install-hooks
56 | 
57 | # See the makefile in yelp_package/Makefile for packaging stuff
58 | itest_%:
59 | 	make -C yelp_package $@
60 | 
61 | # Steps to release (Don't do this if you are not a pushmaster - see "Pushing Code"
62 | # on y/datapipeline)
63 | # 1. `make prepare-release`
64 | # 2. `make release`
65 | LAST_COMMIT_MSG = $(shell git log -1 --pretty=%B )
66 | prepare-release:
67 | 	dch -v $(NEXT_VERSION) --changelog debian/changelog "Commit: $(LAST_COMMIT_MSG)"
68 | 	sed -i -r "s/__version__ = '(.+)'/__version__ = '$(NEXT_VERSION)'/" data_pipeline/__init__.py
69 | 	@git diff
70 | 
71 | release:
72 | 	git commit -a -m "Released $(CURRENT_VERSION) via make release"
73 | 	git tag v$(CURRENT_VERSION)
74 | 	git push --tags origin master && git push origin master
75 | 
76 | compose-prefix:
77 | 	@python -c "from data_pipeline.testing_helpers.containers import Containers; print Containers.compose_prefix()"
78 | 


--------------------------------------------------------------------------------
/tests/tools/sensu_alert_manager_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from datetime import datetime
20 | from datetime import timedelta
21 | 
22 | import mock
23 | import pytest
24 | from dateutil.tz import tzutc
25 | 
26 | from data_pipeline.tools.sensu_alert_manager import SensuAlertManager
27 | 
28 | 
29 | class TestSensuAlertManager(object):
30 | 
31 |     @pytest.fixture
32 |     def sensu_alert_manager(self):
33 |         basic_dict = {"check_every": 60, "ttl": "300s"}
34 |         return SensuAlertManager(
35 |             interval_in_seconds=30,
36 |             service_name="test_service",
37 |             result_dict=basic_dict,
38 |             max_delay_seconds=120,
39 |             disable=False
40 |         )
41 | 
42 |     @pytest.yield_fixture
43 |     def mocked_log_and_send(self, sensu_alert_manager):
44 |         with mock.patch.object(
45 |             sensu_alert_manager,
46 |             'log_and_send_event',
47 |             autospec=True
48 |         ) as mocked_log_and_send:
49 |             yield mocked_log_and_send
50 | 
51 |     def test_process_no_timestamp(self, sensu_alert_manager, mocked_log_and_send):
52 |         sensu_alert_manager.process()
53 |         assert mocked_log_and_send.call_count == 0
54 | 
55 |     def test_process_with_recent_timestamp(self, sensu_alert_manager, mocked_log_and_send):
56 |         sensu_alert_manager.process(datetime.now(tzutc()))
57 |         assert mocked_log_and_send.call_count == 1
58 |         assert mocked_log_and_send.call_args[0][0]['output'] == \
59 |             "test_service has caught up to real time"
60 | 
61 |     def test_process_with_old_timestamp(self, sensu_alert_manager, mocked_log_and_send):
62 |         old_time = datetime.now(tzutc()) - timedelta(hours=24)
63 |         sensu_alert_manager.process(old_time)
64 |         assert mocked_log_and_send.call_count == 1
65 |         assert "min behind real time" in mocked_log_and_send.call_args[0][0]['output']
66 | 
67 |     def test_toggling_disable_to_true(self, sensu_alert_manager, mocked_log_and_send):
68 |         sensu_alert_manager.disable = True
69 |         assert mocked_log_and_send.call_count == 1
70 |         assert mocked_log_and_send.call_args[0][0]['output'] == \
71 |             "disabling sensu alert for test_service"
72 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/introspector/info/topic.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import simplejson
20 | 
21 | from data_pipeline.tools.introspector.base_command import IntrospectorCommand
22 | from data_pipeline.tools.introspector.models import IntrospectorSchema
23 | from data_pipeline.tools.introspector.models import IntrospectorTopic
24 | 
25 | 
26 | class TopicInfoCommand(IntrospectorCommand):
27 |     @classmethod
28 |     def add_parser(cls, subparsers):
29 |         info_command_parser = subparsers.add_parser(
30 |             "topic",
31 |             description="Get information on a specific data pipeline topic.",
32 |             add_help=False
33 |         )
34 | 
35 |         cls.add_base_arguments(info_command_parser)
36 | 
37 |         info_command_parser.add_argument(
38 |             "topic_name",
39 |             type=str,
40 |             help="Name of topic to retrieve information on."
41 |         )
42 | 
43 |         info_command_parser.set_defaults(
44 |             command=lambda args: cls("data_pipeline_instropsector_info_topic").run(
45 |                 args,
46 |                 info_command_parser
47 |             )
48 |         )
49 | 
50 |     def info_topic(self, name):
51 |         topic = self.schematizer.get_topic_by_name(name)
52 |         topic = IntrospectorTopic(
53 |             topic,
54 |             kafka_topics=self._kafka_topics,
55 |             topics_to_range_map=self._topics_with_messages_to_range_map
56 |         ).to_ordered_dict()
57 |         topic['schemas'] = self.list_schemas(name)
58 |         return topic
59 | 
60 |     def list_schemas(
61 |         self,
62 |         topic_name
63 |     ):
64 |         schemas = self.schematizer.get_schemas_by_topic(topic_name)
65 |         schemas = [IntrospectorSchema(schema).to_ordered_dict() for schema in schemas]
66 |         schemas.sort(key=lambda schema: schema['created_at'], reverse=True)
67 |         return schemas
68 | 
69 |     def process_args(self, args, parser):
70 |         super(TopicInfoCommand, self).process_args(args, parser)
71 |         self.topic_name = args.topic_name
72 | 
73 |     def run(self, args, parser):
74 |         self.process_args(args, parser)
75 |         print simplejson.dumps(
76 |             self.info_topic(self.topic_name)
77 |         )
78 | 


--------------------------------------------------------------------------------
/tests/benchmarks/message_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import pytest
20 | 
21 | from data_pipeline.helpers.yelp_avro_store import _AvroStringStore
22 | from data_pipeline.message import CreateMessage
23 | from tests.factories.base_factory import SchemaFactory
24 | 
25 | 
26 | @pytest.mark.usefixtures(
27 |     "config_benchmark_containers_connections"
28 | )
29 | @pytest.mark.benchmark
30 | class TestBenchMessage(object):
31 | 
32 |     def test_create_message(self, benchmark):
33 | 
34 |         def create_message(schema_id, payload_data):
35 |             CreateMessage(
36 |                 schema_id=schema_id,
37 |                 payload_data=payload_data
38 |             )
39 | 
40 |         def setup():
41 |             schema_id = SchemaFactory.get_schema_json().schema_id
42 |             payload_data = SchemaFactory.get_payload_data()
43 |             return [schema_id, payload_data], {}
44 |         benchmark.pedantic(create_message, setup=setup, rounds=1000)
45 | 
46 |     def test_encode_message(self, benchmark):
47 | 
48 |         def setup():
49 |             schema_id = SchemaFactory.get_schema_json().schema_id
50 |             payload_data = SchemaFactory.get_payload_data()
51 | 
52 |             return [schema_id, payload_data], {}
53 | 
54 |         def encode_message(schema_id, payload_data):
55 |             _AvroStringStore().get_writer(schema_id).encode(
56 |                 message_avro_representation=payload_data
57 |             )
58 | 
59 |         benchmark.pedantic(encode_message, setup=setup, rounds=1000)
60 | 
61 |     def test_decode_message(self, benchmark):
62 | 
63 |         def setup():
64 |             schema_id = SchemaFactory.get_schema_json().schema_id
65 |             payload_data = SchemaFactory.get_payload_data()
66 |             payload = _AvroStringStore().get_writer(schema_id).encode(
67 |                 message_avro_representation=payload_data
68 |             )
69 | 
70 |             return [schema_id, payload], {}
71 | 
72 |         def decode_message(schema_id, payload):
73 |             _AvroStringStore().get_reader(
74 |                 reader_id_key=schema_id,
75 |                 writer_id_key=schema_id
76 |             ).decode(
77 |                 encoded_message=payload
78 |             )
79 | 
80 |         benchmark.pedantic(decode_message, setup=setup, rounds=1000)
81 | 


--------------------------------------------------------------------------------
/data_pipeline/schematizer_clientlib/models/data_target.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from collections import namedtuple
20 | 
21 | from data_pipeline.schematizer_clientlib.models.model_base import BaseModel
22 | 
23 | 
24 | """
25 | Represent the data of a data target.  A data target represents a destination
26 | where the data(messages) are eventually sent to, such as a Redshift cluster.
27 | 
28 | Args:
29 |     data_target_id (int): The id of the data target.
30 |     target_type (str): The target type, such as Redshift, etc.
31 |     destination (str): The actual location of the data target, such as the Url
32 |         of a Redshift cluster.
33 | """
34 | DataTarget = namedtuple(
35 |     'DataTarget',
36 |     ['data_target_id', 'name', 'target_type', 'destination']
37 | )
38 | 
39 | 
40 | class _DataTarget(BaseModel):
41 |     """Internal class used to convert from/to various data structure and
42 |     facilitate constructing the return value of schematizer functions.
43 |     """
44 | 
45 |     def __init__(self, data_target_id, name, target_type, destination):
46 |         self.data_target_id = data_target_id
47 |         self.name = name
48 |         self.target_type = target_type
49 |         self.destination = destination
50 | 
51 |     @classmethod
52 |     def from_response(cls, response):
53 |         return cls(
54 |             data_target_id=response.data_target_id,
55 |             name=response.name,
56 |             target_type=response.target_type,
57 |             destination=response.destination
58 |         )
59 | 
60 |     def to_cache_value(self):
61 |         return {
62 |             'data_target_id': self.data_target_id,
63 |             'name': self.name,
64 |             'target_type': self.target_type,
65 |             'destination': self.destination
66 |         }
67 | 
68 |     @classmethod
69 |     def from_cache_value(cls, cache_value):
70 |         return cls(
71 |             data_target_id=cache_value['data_target_id'],
72 |             name=cache_value['name'],
73 |             target_type=cache_value['target_type'],
74 |             destination=cache_value['destination']
75 |         )
76 | 
77 |     def to_result(self):
78 |         return DataTarget(
79 |             data_target_id=self.data_target_id,
80 |             name=self.name,
81 |             target_type=self.target_type,
82 |             destination=self.destination
83 |         )
84 | 


--------------------------------------------------------------------------------
/data_pipeline/schemas/envelope_v1.avsc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "record",
 3 |   "namespace": "yelp.data_pipeline",
 4 |   "name": "envelope",
 5 |   "doc": "Wraps message with schema format for decoding and pipeline metadata.",
 6 |   "fields": [
 7 |     {
 8 |       "name": "uuid",
 9 |       "type": {
10 |         "name": "uuid",
11 |         "type": "fixed",
12 |         "size": 16
13 |       },
14 |       "doc": "Uniquely identifies the message."
15 |     },
16 |     {
17 |       "name": "message_type",
18 |       "type": {
19 |         "name": "message_type",
20 |         "type": "enum",
21 |         "doc": "Enumeration for possible types of message, these are used as values for data_pipeline.message_type.MessageType.",
22 |         "symbols": [
23 |           "create",
24 |           "update",
25 |           "delete",
26 |           "refresh",
27 |           "heartbeat",
28 |           "monitor",
29 |           "registration",
30 |           "log"
31 |         ]
32 |       },
33 |       "doc": "Identifies the type of message.  Refresh messages are used to bootstrap topics and can be safely ignored by consumers that only care about data updates.  Heartbeats will be emitted periodically on topics without other messages to facilitate auditing - they will not be passed to the consumer."
34 |     },
35 |     {
36 |       "name": "schema_id",
37 |       "type": "int",
38 |       "doc": "Schema identifier for the payload."
39 |     },
40 |     {
41 |       "name": "payload",
42 |       "type": "bytes",
43 |       "doc": "Avro-encoded data encoded with the schema corresponding to schema_id"
44 |     },
45 |     {
46 |       "name": "previous_payload",
47 |       "type": [
48 |         "null",
49 |         "bytes"
50 |       ],
51 |       "doc": "Avro-encoded state of the row prior to the update for update messages."
52 |     },
53 |     {
54 |       "name": "meta",
55 |       "type": [
56 |         "null",
57 |         {
58 |           "type": "array",
59 |           "items": {
60 |             "type": "record",
61 |             "name": "meta_envelope",
62 |             "doc": "Record to encode and pipeline each meta_attribute in data_pipeline message.",
63 |             "fields": [
64 |               {
65 |                 "name": "schema_id",
66 |                 "type": "int",
67 |                 "doc": "Schema identifier for the meta-attribute payload."
68 |               },
69 |               {
70 |                 "name": "payload",
71 |                 "type": "bytes",
72 |                 "doc": "Avro-encoded data encoded with the schema corresponding to schema_id"
73 |               }
74 |             ]
75 |           }
76 |         }
77 |       ],
78 |       "doc": "List of record containing schema_id and encoded payloads for meta-attributes."
79 |     },
80 |     {
81 |       "name": "encryption_type",
82 |       "type": [
83 |         "null",
84 |         "string"
85 |       ],
86 |       "doc": "If encryption is used, identifies the type and key used to encrypt the payload."
87 |     },
88 |     {
89 |       "name": "timestamp",
90 |       "type": "int",
91 |       "doc": "Time the message was produced."
92 |     }
93 |   ]
94 | }
95 | 


--------------------------------------------------------------------------------
/data_pipeline/testing_helpers/kafka_docker.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from contextlib import contextmanager
20 | 
21 | from kafka import KafkaClient
22 | from kafka import SimpleConsumer
23 | 
24 | from data_pipeline.config import get_config
25 | from data_pipeline.message import create_from_offset_and_message
26 | 
27 | 
28 | _ONE_MEGABYTE = 1024 * 1024
29 | logger = get_config().logger
30 | 
31 | 
32 | @contextmanager
33 | def capture_new_data_pipeline_messages(topic):
34 |     """contextmanager that moves to the tail of the given topic, and waits to
35 |     receive new messages, returning a function that can be called zero or more
36 |     times which will retrieve decoded data pipeline messages from the topic.
37 | 
38 |     Returns:
39 |         Callable[[int], List[Message]]: Function that takes a single
40 |             optional argument, count, and returns up to count decoded data pipeline
41 |             messages.  This function does not block, and will return however many
42 |             messages are available immediately.  Default count is 100.
43 |     """
44 |     with capture_new_messages(topic) as get_kafka_messages:
45 |         def get_data_pipeline_messages(count=100):
46 |             kafka_messages = get_kafka_messages(count)
47 |             return [
48 |                 create_from_offset_and_message(kafka_message)
49 |                 for kafka_message in kafka_messages
50 |             ]
51 | 
52 |         yield get_data_pipeline_messages
53 | 
54 | 
55 | @contextmanager
56 | def capture_new_messages(topic):
57 |     """Seeks to the tail of the topic then returns a function that can
58 |     consume messages from that point.
59 |     """
60 |     with setup_capture_new_messages_consumer(topic) as consumer:
61 |         def get_messages(count=100):
62 |             return consumer.get_messages(count=count)
63 | 
64 |         yield get_messages
65 | 
66 | 
67 | @contextmanager
68 | def setup_capture_new_messages_consumer(topic):
69 |     """Seeks to the tail of the topic then returns a function that can
70 |     consume messages from that point.
71 |     """
72 |     kafka = KafkaClient(get_config().cluster_config.broker_list)
73 |     group = str('data_pipeline_clientlib_test')
74 |     consumer = SimpleConsumer(kafka, group, topic, max_buffer_size=_ONE_MEGABYTE)
75 |     consumer.seek(0, 2)  # seek to tail, 0 is the offset, and 2 is the tail
76 | 
77 |     yield consumer
78 | 
79 |     kafka.close()
80 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/introspector/register/mysql_command.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from data_pipeline.tools.introspector.register.base_command import _BaseRegisterCommand
20 | 
21 | 
22 | class RegisterMysqlCommand(_BaseRegisterCommand):
23 |     @classmethod
24 |     def add_parser(cls, subparsers):
25 |         register_mysql_command_parser = subparsers.add_parser(
26 |             "mysql",
27 |             description="Register the given mysql statements "
28 |                         "as schemas to the schematizer.",
29 |             add_help=False
30 |         )
31 | 
32 |         cls.add_base_arguments(register_mysql_command_parser)
33 | 
34 |         register_mysql_command_parser.add_argument(
35 |             "--create-table",
36 |             type=str,
37 |             required=True,
38 |             help="The mysql statement of creating new table"
39 |         )
40 | 
41 |         register_mysql_command_parser.add_argument(
42 |             "--old-create-table",
43 |             type=str,
44 |             default=None,
45 |             help="The mysql statement of creating old table. "
46 |         )
47 | 
48 |         register_mysql_command_parser.add_argument(
49 |             "--alter-table",
50 |             type=str,
51 |             default=None,
52 |             help="The mysql statement of altering table schema. "
53 |         )
54 | 
55 |         register_mysql_command_parser.set_defaults(
56 |             command=lambda args: cls("data_pipeline_instropsector_register_mysql").run(
57 |                 args, register_mysql_command_parser
58 |             )
59 |         )
60 | 
61 |     def process_args(self, args, parser):
62 |         super(RegisterMysqlCommand, self).process_args(args, parser)
63 |         self.create_table = args.create_table
64 |         self.old_create_table = args.old_create_table
65 |         self.alter_table = args.alter_table
66 | 
67 |     def run(self, args, parser):
68 |         self.process_args(args, parser)
69 |         schema = self.schematizer.register_schema_from_mysql_stmts(
70 |             namespace=self.namespace,
71 |             source=self.source_name,
72 |             source_owner_email=self.source_owner_email,
73 |             contains_pii=self.pii,
74 |             new_create_table_stmt=self.create_table,
75 |             old_create_table_stmt=self.old_create_table,
76 |             alter_table_stmt=self.alter_table
77 |         )
78 |         self.print_schema(schema)
79 | 


--------------------------------------------------------------------------------
/data_pipeline/helpers/yelp_avro_store.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from data_pipeline_avro_util.avro_string_reader import AvroStringReader
20 | from data_pipeline_avro_util.avro_string_writer import AvroStringWriter
21 | 
22 | from data_pipeline.helpers.singleton import Singleton
23 | from data_pipeline.schematizer_clientlib.schematizer import get_schematizer
24 | 
25 | 
26 | class _AvroStringStore(object):
27 |     """Singleton instance of store that caches
28 |     AvroStringsWriter and AvroStringReader objects perticularly
29 |     used by message class to encode and decode messages respectively.
30 | 
31 |     This class was added for performance enhancements
32 |     w store : pb/199453
33 |     w/o store : pb/199448
34 |     """
35 |     __metaclass__ = Singleton
36 | 
37 |     def __init__(self):
38 |         self._writer_cache = {}
39 |         self._reader_cache = {}
40 | 
41 |     @property
42 |     def _schematizer(self):
43 |         return get_schematizer()
44 | 
45 |     def _get_avro_schema(self, schema_id):
46 |         return self._schematizer.get_schema_by_id(
47 |             schema_id
48 |         ).schema_json
49 | 
50 |     def get_writer(self, id_key, avro_schema=None):
51 |         key = id_key
52 |         avro_string_writer = self._writer_cache.get(key)
53 |         if avro_string_writer:
54 |             return avro_string_writer
55 | 
56 |         avro_schema = avro_schema or self._get_avro_schema(id_key)
57 |         avro_string_writer = AvroStringWriter(schema=avro_schema)
58 |         self._writer_cache[key] = avro_string_writer
59 |         return avro_string_writer
60 | 
61 |     def get_reader(
62 |         self,
63 |         reader_id_key,
64 |         writer_id_key,
65 |         reader_avro_schema=None,
66 |         writer_avro_schema=None
67 |     ):
68 |         key = reader_id_key, writer_id_key
69 |         avro_string_reader = self._reader_cache.get(key)
70 |         if avro_string_reader:
71 |             return avro_string_reader
72 | 
73 |         reader_schema = (
74 |             reader_avro_schema or self._get_avro_schema(reader_id_key)
75 |         )
76 |         writer_schema = (
77 |             writer_avro_schema or self._get_avro_schema(writer_id_key)
78 |         )
79 |         avro_string_reader = AvroStringReader(
80 |             reader_schema=reader_schema,
81 |             writer_schema=writer_schema
82 |         )
83 |         self._reader_cache[key] = avro_string_reader
84 |         return avro_string_reader
85 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/introspector/register/avro_command.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from data_pipeline.tools.introspector.register.base_command import _BaseRegisterCommand
20 | 
21 | 
22 | class RegisterAvroCommand(_BaseRegisterCommand):
23 |     @classmethod
24 |     def add_parser(cls, subparsers):
25 |         register_avro_command_parser = subparsers.add_parser(
26 |             "avro",
27 |             description="Register the given avro schema to the schematizer.",
28 |             add_help=False
29 |         )
30 | 
31 |         cls.add_base_arguments(register_avro_command_parser)
32 | 
33 |         register_avro_command_parser.add_argument(
34 |             "--avro-schema",
35 |             type=str,
36 |             required=True,
37 |             help="The json of the avro schema."
38 |         )
39 | 
40 |         register_avro_command_parser.add_argument(
41 |             "--cluster-type",
42 |             dest="cluster_type",
43 |             default='datapipe',
44 |             help="Kafka cluster type to connect. Defaults to datapipe. "
45 |                  "Currently only 'datapipe' and 'scribe' cluster types are "
46 |                  "supported."
47 |         )
48 | 
49 |         register_avro_command_parser.add_argument(
50 |             "--base-schema-id",
51 |             type=int,
52 |             default=None,
53 |             help="The id of the original schema the new avro schema was built upon."
54 |         )
55 | 
56 |         register_avro_command_parser.set_defaults(
57 |             command=lambda args: cls("data_pipeline_instropsector_register_avro").run(
58 |                 args, register_avro_command_parser
59 |             )
60 |         )
61 | 
62 |     def process_args(self, args, parser):
63 |         super(RegisterAvroCommand, self).process_args(args, parser)
64 |         self.avro_schema = args.avro_schema
65 |         self.cluster_type = args.cluster_type
66 |         self.base_schema_id = args.base_schema_id
67 | 
68 |     def run(self, args, parser):
69 |         self.process_args(args, parser)
70 |         schema = self.schematizer.register_schema(
71 |             namespace=self.namespace,
72 |             source=self.source_name,
73 |             schema_str=self.avro_schema,
74 |             source_owner_email=self.source_owner_email,
75 |             contains_pii=self.pii,
76 |             cluster_type=self.cluster_type,
77 |             base_schema_id=self.base_schema_id
78 |         )
79 |         self.print_schema(schema)
80 | 


--------------------------------------------------------------------------------
/data_pipeline/schematizer_clientlib/models/consumer_group_data_source.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from collections import namedtuple
20 | 
21 | from data_pipeline.schematizer_clientlib.models.data_source_type_enum import DataSourceTypeEnum
22 | from data_pipeline.schematizer_clientlib.models.model_base import BaseModel
23 | 
24 | 
25 | """
26 | Represent the data of the mapping between a data source and a consumer group.
27 | A data source represents a namespace or a source.
28 | 
29 | Args:
30 |     consumer_group_data_source_id (int): The id of the mapping between consumer
31 |         group and data source.
32 |     consumer_group_id (str): The id of the consumer group.
33 |     data_source_type
34 |     (data_pipeline.schematizer_clientlib.models.data_source_type_enum.DataSourceTypeEnum):
35 |         The type of the data_source.
36 |     data_source_id: The id of the data target.  Depending on the data source
37 |         type, it may be a namespace id or source id.
38 | """
39 | ConsumerGroupDataSource = namedtuple(
40 |     'ConsumerGroupDataSource',
41 |     ['consumer_group_data_source_id', 'consumer_group_id', 'data_source_type',
42 |      'data_source_id']
43 | )
44 | 
45 | 
46 | class _ConsumerGroupDataSource(BaseModel):
47 |     """Internal class used to convert from/to various data structure and
48 |     facilitate constructing the return value of schematizer functions.
49 |     """
50 | 
51 |     def __init__(self, consumer_group_data_source_id, consumer_group_id,
52 |                  data_source_type, data_source_id):
53 |         self.consumer_group_data_source_id = consumer_group_data_source_id
54 |         self.consumer_group_id = consumer_group_id
55 |         self.data_source_type = data_source_type
56 |         self.data_source_id = data_source_id
57 | 
58 |     @classmethod
59 |     def from_response(cls, response):
60 |         return cls(
61 |             consumer_group_data_source_id=response.consumer_group_data_source_id,
62 |             consumer_group_id=response.consumer_group_id,
63 |             data_source_type=DataSourceTypeEnum[response.data_source_type],
64 |             data_source_id=response.data_source_id
65 |         )
66 | 
67 |     def to_result(self):
68 |         return ConsumerGroupDataSource(
69 |             consumer_group_data_source_id=self.consumer_group_data_source_id,
70 |             consumer_group_id=self.consumer_group_id,
71 |             data_source_type=self.data_source_type,
72 |             data_source_id=self.data_source_id
73 |         )
74 | 


--------------------------------------------------------------------------------
/data_pipeline/schematizer_clientlib/models/source.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from collections import namedtuple
20 | 
21 | from data_pipeline.schematizer_clientlib.models.model_base import BaseModel
22 | from data_pipeline.schematizer_clientlib.models.namespace import _Namespace
23 | 
24 | 
25 | """
26 | Represent the data of a source.  Source is a sub-group under namespaces which
27 | an avro schema is created for.  For example, `user` (table) could be a source.
28 | 
29 | Args:
30 |     source_id (int): The id of the source.
31 |     name (str): The name of the source.
32 |     owner_email (str): The email of the source owner.
33 |     namespace (data_pipeline.schematizer_clientlib.models.namespace.Namespace):
34 |         The namespace of the source.
35 |     category (str): The category of the source. (e.g. Content, Deals etc.)
36 | """
37 | Source = namedtuple(
38 |     'Source',
39 |     ['source_id', 'name', 'owner_email', 'namespace', 'category']
40 | )
41 | 
42 | 
43 | class _Source(BaseModel):
44 | 
45 |     def __init__(self, source_id, name, owner_email, namespace, category):
46 |         self.source_id = source_id
47 |         self.name = name
48 |         self.owner_email = owner_email
49 |         self.namespace = namespace
50 |         self.category = category
51 | 
52 |     @classmethod
53 |     def from_response(cls, response):
54 |         return cls(
55 |             source_id=response.source_id,
56 |             name=response.name,
57 |             owner_email=response.owner_email,
58 |             namespace=_Namespace.from_response(response.namespace),
59 |             category=response.category
60 |         )
61 | 
62 |     def to_cache_value(self):
63 |         return {
64 |             'source_id': self.source_id,
65 |             'name': self.name,
66 |             'owner_email': self.owner_email,
67 |             'namespace': self.namespace,
68 |             'category': self.category
69 |         }
70 | 
71 |     @classmethod
72 |     def from_cache_value(cls, cache_value):
73 |         return cls(
74 |             source_id=cache_value['source_id'],
75 |             name=cache_value['name'],
76 |             owner_email=cache_value['owner_email'],
77 |             namespace=cache_value['namespace'],
78 |             category=cache_value['category']
79 |         )
80 | 
81 |     def to_result(self):
82 |         return Source(
83 |             source_id=self.source_id,
84 |             name=self.name,
85 |             owner_email=self.owner_email,
86 |             namespace=self.namespace.to_result(),
87 |             category=self.category
88 |         )
89 | 


--------------------------------------------------------------------------------
/data_pipeline/schematizer_clientlib/models/consumer_group.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from collections import namedtuple
20 | 
21 | from data_pipeline.schematizer_clientlib.models.data_target import _DataTarget
22 | from data_pipeline.schematizer_clientlib.models.model_base import BaseModel
23 | 
24 | 
25 | """
26 | Represent the data of a consumer group.  A consumer group represents a group of
27 | the consumers that send all the messages to the same destination, defined as a
28 | "data target".
29 | 
30 | Args:
31 |     consumer_group_id (int): The id of the consumer group.
32 |     group_name (str): The name of the consumer group.
33 |     data_target (data_pipeline.schematizer_clientlib.models.data_target.DataTarget):
34 |         The data_target this consumer group associates to.
35 | """
36 | ConsumerGroup = namedtuple(
37 |     'ConsumerGroup',
38 |     ['consumer_group_id', 'group_name', 'data_target']
39 | )
40 | 
41 | 
42 | class _ConsumerGroup(BaseModel):
43 |     """Internal class used to convert from/to various data structure and
44 |     facilitate constructing the return value of schematizer functions.
45 |     """
46 | 
47 |     def __init__(self, consumer_group_id, group_name, data_target):
48 |         self.consumer_group_id = consumer_group_id
49 |         self.group_name = group_name
50 |         self.data_target = data_target
51 | 
52 |     @classmethod
53 |     def from_response(cls, response):
54 |         return cls(
55 |             consumer_group_id=response.consumer_group_id,
56 |             group_name=response.group_name,
57 |             data_target=_DataTarget.from_response(response.data_target)
58 |         )
59 | 
60 |     def to_cache_value(self):
61 |         return {
62 |             'consumer_group_id': self.consumer_group_id,
63 |             'group_name': self.group_name,
64 |             'data_target_id': self.data_target.data_target_id
65 |         }
66 | 
67 |     @classmethod
68 |     def from_cache_value(cls, cache_value):
69 |         id_only_data_target = _DataTarget(
70 |             data_target_id=cache_value['data_target_id'],
71 |             name=None,
72 |             target_type=None,
73 |             destination=None
74 |         )
75 |         return cls(
76 |             consumer_group_id=cache_value['consumer_group_id'],
77 |             group_name=cache_value['group_name'],
78 |             data_target=id_only_data_target
79 |         )
80 | 
81 |     def to_result(self):
82 |         return ConsumerGroup(
83 |             consumer_group_id=self.consumer_group_id,
84 |             group_name=self.group_name,
85 |             data_target=self.data_target.to_result()
86 |         )
87 | 


--------------------------------------------------------------------------------
/key-1.key:
--------------------------------------------------------------------------------
 1 | MIIJKQIBAAKCAgEAgtOJ7j/3h+fynqWCV0C9EV0PMl7x6ht/QX9Ool6aGXuG7IOb
 2 | +PHhmpcPrSmoRaC2m3dJooLxzxAtIJ1JbNb9c55lvCg2ft/3d0CwwD/HMmCSQ3V7
 3 | QykcgDgqW3W9RWlUa6mb4ExxFowwx5TLa0snCVhPv8FjwLC5z7NqispjuSVdwgoQ
 4 | nY4fg4wpvH/ziHT2qkNBLP/y1xHmhzsaWWLxdFT4IBmHCWorQEmfB9WHwxEO510u
 5 | cjkOjSZ66ba6yLgsgY1VMsCYj/rQpQeZYgjk7ZwelGJyvz/lrH87piSRE70zReAj
 6 | GuGKEULYVPxsFT7xEhgwDbH6+8ZKo1Sz49DTgB9MtSOtI5UkSct1/Hv3SJoZXmDS
 7 | Avn0MlyOxAYhlX9FVS2placyksM6lPfRHUH9SndOK4OOLVhnLvKyY/F9pFXx6QP1
 8 | Ni0J+s5ylNBNl8dXbPBRtFRFgHnyZ85XDMQ0tDWULydkQEqbfuy0SWIgfS9ClBPo
 9 | 1nQNTNvlUp6re0CSz+I0siQ/ctl6F172/8m3TbNVf6FL7VKte6gksVWjeqDH66ww
10 | EyEQC7dLSGFiD/CFv/6k6zv5VZRrpjBISjvXg89h5BhWQ8bXqlbPdEGN0vgx9Kwm
11 | DV8eW3uQZenbepNdLOVIkDzKQZZ+q5C7FNiFM4dyzXQ3uMPcOw+p9nTXUwkCAwEA
12 | AQKCAgBbn174FqB68/y9Etjn1pUTNgNQmaH+/ObwrgIaQPrXYeWqermfbXc81xLJ
13 | iWUp7lY1JyPQxrgm9gNE1iL4MNEsSgCh99/Cl4QuZoXi/ke5hlypFfwPmxk0yyEP
14 | ELRPimbvR8e9HIwO6Hu5KwIT0Hw0VT704rHD34Eib1rsfLW+kubI4FemoER90ong
15 | VBPwu0mNELDynQYrjYPPrZrUgzDq8h8cDATAmTRVP7o6uFbuTGINsvq2UDNas1pd
16 | l/XUYPf6e0rx0mSGYXgCPAI14FjD+BSKgzAttMhbOHhD8VQRaO4hv8mWJ+KUGCZ9
17 | zpK7hms66umsjbti3QqkvMuHf1a/V9NwJ6Dsz41vAzqL4tGvbBrCd24nXeTMbMi8
18 | 0QDhzAMt+z2sB2E087pLyt4AOS9AGSVbR2jBzac0Fkw3J+MAxcKWLbMDiI6NxUoC
19 | olHFDsISpefxqYDkP/+tO4fQbyzwpeArQ3MhmYwpkGfKez8zIFZPzxwSdz6KvEso
20 | HniYA2uR8muXshC+CV0H2aERa8Fuw88RFdHqAB7uS8wiNWTGy1XhByvVBwtgSu2R
21 | usO7r6C3+RMNcj3+i1tBFMLVatzn6Vqo+z2jsJ3NtWExl1Q0UsIeeEu8iwURmopv
22 | 87INvTOoUssX1lS7fTxSCBmmMw6JN5kEGhL92CR4/E7DxEAMyQKCAQEA8cuAJ//e
23 | U8TFbCS6hr30qYDOx0vWZe2Ju1uVz6Sk5sKiKw5PuLnya46SIOY1zGVpT2IJCjut
24 | D2b+IgPateO50kClen7/DM49sBYp7KuFz3XZ7Oc4hq470mQ/wwmMc2nqY5DU4+wm
25 | 6LL0quGK6nBIWE52MYqXFdCXc23ymv5eb4J0qzu5OwLzSAGZtMBXyCH9sKtzcGHJ
26 | E8eOfYNhE8O0/O2dpjpOJJTBuPhQztPKEBLJ5mnlOKoFmzQbJJBo4uwufjkIzD/p
27 | z1P4T/YJheroJMrZOAAVDUZpYDcPK1wtleYINvPUvdru6lxA10wHPysBLHsnSlvu
28 | PJRDTgk+x1XuZwKCAQEAioMdVuAf3ylgvTDNEQCxFjAW7uQkDi9zjkNblRXn/Xh1
29 | dvqtH0SnDXuq0ZkE5Ce1Bh03DQnw7YL21VfL6B10Q+7c7hu04WsI6zoX8AB9s7Si
30 | Jqm5hD4YWjSshwH7Hgojufr1IgYTiIpAGCmJlhOnFRRxj/oLAnJgUqtx8Vr/JNG5
31 | OwZB6ZRmhjhlzODvIIAo0LwRw4KiPj729OpkGAaL3eaHYJdLlGP0xClr60acEK6I
32 | OoEyoFr8ywNsewOOUjO522y75V32YhrbeQdQpjVdQuVuxPSJaSj2VjnKDewLZASr
33 | 7HrNLhQ5qaXcwuUpkn+7eVSYey9Pcna8Mc467H/tDwKCAQEA5vDKV+n/7Jx9P5wY
34 | 98cInrjjRuD9Rt9I9MU14lxMEAeIIn2J0VLw4qAKnBek/LNDkUl+fuKemZ5yTgTG
35 | aYG3IsUSCFyxuAZoTv3sTxtIM+1JFweFKsXRZ1LOv7coCbf44d4cKBGO3CTIoUnI
36 | h8B0qLtQEmNfiLtxXBOAa19WzUSZsE7bzSw1NuooZ8ZqUUF977biDE2pYxXv15ub
37 | Jeh/vjApqfH4XuIh4Uxyygjo7s8zPbqY77Uco69x2UuAknSot3A8IfFNnPODsMO7
38 | Nsg1r6Z7MAyMDuDYlsgf2fcxpwFNiUb8Q9WO6hLw73GzXjbx6Q07ar9BXWTyn98e
39 | sf82MwKCAQBj8FJc+n1EkSBhoTJ00CD1t2D0oNt6+LXRsbwEh7fJZP6sCBaddvVp
40 | fw8zB8tFq3irnqy31bJmTJY4PI69PXNECLaTfp41/vqCbnoTCuenn+9XIiLezcbY
41 | XyUUjFNZy0sXx4DTObsiS04PoPjKtgdZ6FQ/49PJwijJvTYLRPLO6BtUASHRnOeU
42 | dGDPQISI2K+aW/gdLsZNTzT0ZaIuy5pjUw/em4tG2BAk8RYSvfGg/6z/OXUDymKX
43 | QDMnVtt7aCLztTKlPfSluGceni6MnfhaahxQM3hpzcpfUHb/MI7Pbad2iuw8EQ/B
44 | 3hC49ovWTh9AXc9fIjWaix0ieoB9X5f1AoIBAQCp9ALAlnj4rra9SRou7uxCLYSW
45 | IYeqLEkBhnOJJHHekEwFimRwRm9LxSMoYHtbuZByuZBo7bwp2wUj6wrtn1xzsB/r
46 | 3KYX0Z+hDksLhBUQpTeiVAej6BbQUaOiNJ0M++EskpcM9J34nNBbWxk/zB/bFrRA
47 | MC7GGGQ6lXlj/e8YWHfUuLijGF7q9FUfLRAcCDwkoDdfnO/fEKRcKrDBc8ezGTa5
48 | WNLlA7a4nhw9JZ+rTK9wL4PH4GXxSTVkhk/NP/YMSwriGVfeLnWBpW+T/HNarHIk
49 | 3j2M4/b72cIAtQtZfwRR8WZ5JXs8ZIn5UN4kM0MvoVTDdYh0ieLevuIfqA08
50 | 


--------------------------------------------------------------------------------
/tests/helpers/decorators_test.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright 2016 Yelp Inc.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #   http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing,
 11 | # software distributed under the License is distributed on an
 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 13 | # KIND, either express or implied.  See the License for the
 14 | # specific language governing permissions and limitations
 15 | # under the License.
 16 | from __future__ import absolute_import
 17 | from __future__ import unicode_literals
 18 | 
 19 | import tempfile
 20 | 
 21 | import pytest
 22 | 
 23 | from data_pipeline.helpers.decorators import memoized
 24 | 
 25 | 
 26 | def fibonacci(n):
 27 |     if n <= 0:
 28 |         return 0
 29 |     if n == 1:
 30 |         return 1
 31 |     else:
 32 |         return fibonacci(n - 1) + fibonacci(n - 2)
 33 | 
 34 | 
 35 | @memoized
 36 | def fast_fibonacci(n):
 37 |     if n <= 0:
 38 |         return 0
 39 |     if n == 1:
 40 |         return 1
 41 |     else:
 42 |         return fast_fibonacci(n - 1) + fast_fibonacci(n - 2)
 43 | 
 44 | 
 45 | @memoized
 46 | def identity(x):
 47 |     return x
 48 | 
 49 | 
 50 | class TestMemoized(object):
 51 |     """Ensure memoization decorator behaves per its specification"""
 52 | 
 53 |     def test_basic(self):
 54 |         """Basic correctness tests"""
 55 |         assert identity((1,)) == (1,)
 56 |         assert fibonacci(1) == fast_fibonacci(1)
 57 |         assert fibonacci(2) == fast_fibonacci(2)
 58 |         assert fibonacci(3) == fast_fibonacci(3)
 59 |         assert fibonacci(10) == fast_fibonacci(10)
 60 | 
 61 |     def test_unhashable_args(self):
 62 |         """The memoization decorator should even work with
 63 |         common unhashable arguments ..."""
 64 |         assert identity([1]) == [1]
 65 |         assert identity(set([1])) == set([1])
 66 |         assert identity({'a': 1}) == {'a': 1}
 67 | 
 68 |     def test_uncacheable_args(self):
 69 |         """... but might not work with all unhashable objects."""
 70 |         f = tempfile.NamedTemporaryFile()
 71 |         with pytest.raises(TypeError):
 72 |             identity(f)
 73 | 
 74 |     def test_performance(self):
 75 |         """Ensure that the memoization decorator actually saves
 76 |         function calls"""
 77 | 
 78 |         @memoized
 79 |         def my_identity(x, sheep=False):
 80 |             my_identity.num_calls += 1
 81 |             if sheep:
 82 |                 return "sheep"
 83 |             else:
 84 |                 return x
 85 | 
 86 |         my_identity.num_calls = 0
 87 | 
 88 |         assert my_identity(1) == 1
 89 |         assert my_identity(1) == 1
 90 |         assert my_identity(1) == 1
 91 |         assert my_identity(2) == 2
 92 |         assert my_identity(2) == 2
 93 |         assert my_identity(2) == 2
 94 | 
 95 |         assert my_identity.num_calls == 2
 96 | 
 97 |         # Ensure kwargs work
 98 |         assert my_identity(1, sheep=True) == "sheep"
 99 |         assert my_identity(1, sheep=True) == "sheep"
100 |         assert my_identity(2, sheep=True) == "sheep"
101 |         assert my_identity(2, sheep=True) == "sheep"
102 | 
103 |         assert my_identity.num_calls == 4
104 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/introspector/info/namespace.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | import simplejson
20 | 
21 | from data_pipeline.tools.introspector.base_command import IntrospectorCommand
22 | from data_pipeline.tools.introspector.models import IntrospectorNamespace
23 | 
24 | 
25 | class NamespaceInfoCommand(IntrospectorCommand):
26 |     @classmethod
27 |     def add_parser(cls, subparsers):
28 |         info_command_parser = subparsers.add_parser(
29 |             "namespace",
30 |             description="Get information on a specific data pipeline namespace.",
31 |             add_help=False
32 |         )
33 | 
34 |         info_command_parser.add_argument(
35 |             '--active-namespaces',
36 |             default=False,
37 |             action='store_true',
38 |             help=(
39 |                 'If set, this command will also return information about active '
40 |                 'sources and topics for this namespace. '
41 |                 'This is a time expensive operation.'
42 |             )
43 |         )
44 | 
45 |         cls.add_base_arguments(info_command_parser)
46 | 
47 |         info_command_parser.add_argument(
48 |             "namespace_name",
49 |             type=str,
50 |             help="Name of namespace to retrieve information on."
51 |         )
52 | 
53 |         info_command_parser.set_defaults(
54 |             command=lambda args: cls("data_pipeline_instropsector_info_namespace").run(
55 |                 args,
56 |                 info_command_parser
57 |             )
58 |         )
59 | 
60 |     def info_namespace(self, name, active_namespaces=False):
61 |         namespaces = self.schematizer.get_namespaces()
62 |         info_namespace = None
63 |         for namespace in namespaces:
64 |             if namespace.name == name:
65 |                 info_namespace = namespace
66 |                 break
67 |         if info_namespace:
68 |             namespace = IntrospectorNamespace(
69 |                 namespace,
70 |                 active_namespaces=(self.active_namespaces if active_namespaces else None)
71 |             ).to_ordered_dict()
72 |             namespace['sources'] = self.list_sources(
73 |                 namespace_name=namespace['name']
74 |             )
75 |             return namespace
76 |         else:
77 |             raise ValueError("Given namespace doesn't exist")
78 | 
79 |     def process_args(self, args, parser):
80 |         super(NamespaceInfoCommand, self).process_args(args, parser)
81 |         self.namespace_name = args.namespace_name
82 | 
83 |     def run(self, args, parser):
84 |         self.process_args(args, parser)
85 |         print simplejson.dumps(
86 |             self.info_namespace(
87 |                 self.namespace_name,
88 |                 active_namespaces=args.active_namespaces
89 |             )
90 |         )
91 | 


--------------------------------------------------------------------------------
/data_pipeline/schematizer_clientlib/models/avro_schema_element.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | from __future__ import absolute_import
17 | from __future__ import unicode_literals
18 | 
19 | from collections import namedtuple
20 | 
21 | from data_pipeline.schematizer_clientlib.models.model_base import BaseModel
22 | from data_pipeline.schematizer_clientlib.models.note import _Note
23 | 
24 | 
25 | """
26 | Represent the data of an Avro schema element.
27 | 
28 | Args:
29 |     id (int): The element id.
30 |     schema_id (int): The id of the avro schema.
31 |     element_type (): The data type of the element
32 |     element_name (str): The column corresponding to the key of the AvroSchemaElement
33 |     doc ():
34 |     note (Optional[data_pipeline.schematizer_clientlib.models.note.Note]): Information specified by users about the schema.
35 |     created_at (str): The timestamp when the schema is created in ISO-8601
36 |         format.
37 |     updated_at (str): The timestamp when the schema is last updated in ISO-8601
38 |         format.
39 | """
40 | 
41 | AvroSchemaElement = namedtuple(
42 |     'AvroSchemaElement',
43 |     ['id', 'schema_id', 'element_type', 'element_name', 'doc',
44 |      'note', 'created_at', 'updated_at']
45 | )
46 | 
47 | _SCHEMA_KEY_DELIMITER = '|'
48 | 
49 | 
50 | class _AvroSchemaElement(BaseModel):
51 |     """Internal class used to convert from/to various data structure and
52 |     facilitate constructing the return value of schematizer functions.
53 |     """
54 | 
55 |     def __init__(self, id, schema_id, element_type, key, doc, note,
56 |                  created_at, updated_at):
57 |         self.id = id
58 |         self.schema_id = schema_id
59 |         self.element_type = element_type
60 |         self.element_name = None
61 |         self.doc = doc
62 |         self.note = note
63 |         self.created_at = created_at
64 |         self.updated_at = updated_at
65 |         split_keys = key.split(_SCHEMA_KEY_DELIMITER)
66 |         if len(split_keys) >= 2:
67 |             self.element_name = split_keys[1]
68 | 
69 |     @classmethod
70 |     def from_response(cls, response_lst):
71 |         res = []
72 |         for response in response_lst:
73 |             res.append(
74 |                 cls(
75 |                     id=response.id,
76 |                     schema_id=response.schema_id,
77 |                     element_type=response.element_type,
78 |                     key=response.key,
79 |                     doc=response.doc,
80 |                     note=_Note.from_response(response.note),
81 |                     created_at=response.created_at,
82 |                     updated_at=response.updated_at
83 |                 )
84 |             )
85 |         return res
86 | 
87 |     def to_result(self):
88 |         return AvroSchemaElement(
89 |             id=self.id,
90 |             schema_id=self.schema_id,
91 |             element_type=self.element_type,
92 |             element_name=self.element_name,
93 |             doc=self.doc,
94 |             note=self.note.to_result() if self.note is not None else None,
95 |             created_at=self.created_at,
96 |             updated_at=self.updated_at
97 |         )
98 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/binlog_analyzer.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright 2016 Yelp Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing,
11 | # software distributed under the License is distributed on an
12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, either express or implied.  See the License for the
14 | # specific language governing permissions and limitations
15 | # under the License.
16 | """Use this like:
17 | mysqlbinlog --read-from-remote-server --host 10.69.1.100 -u rbr_test \
18 |     --stop-never --verbose --start-datetime="2015-03-08 00:45:00" \
19 |     mysql-bin.000405 | ~/pypy-2.5.0-linux64/bin/pypy ~/binlog_analyzer.py \
20 |     | ~/pypy-2.5.0-linux64/bin/pypy ~/compressed_stream_rotator.py
21 | """
22 | from __future__ import absolute_import
23 | from __future__ import unicode_literals
24 | 
25 | import datetime
26 | import errno
27 | import fileinput
28 | import json
29 | import re
30 | import time
31 | 
32 | 
33 | class BinlogParser(object):
34 |     statement_to_type = {'INSERT INTO': 'insert', 'UPDATE': 'update', 'DELETE FROM': 'delete'}
35 | 
36 |     def __init__(self):
37 |         self.timestamp = None
38 |         self.header_timestamp = None
39 | 
40 |     def run(self):
41 |         try:
42 |             self._parse_binlog()
43 |         except IOError as e:
44 |             if e.errno == errno.EPIPE:
45 |                 # just stop if the pipe breaks
46 |                 pass
47 |             else:
48 |                 raise
49 | 
50 |     def _parse_binlog(self):
51 |         for line in fileinput.input():
52 |             line = line.strip()
53 |             self._process_line(line)
54 | 
55 |     def _process_line(self, line):
56 |         if self._is_setting_timestamp(line):
57 |             self._handle_timestamp_line(line)
58 |         if self._is_header_line(line):
59 |             self._handle_header_line(line)
60 |         elif self._is_updating(line):
61 |             self._handle_update_line(line)
62 | 
63 |     def _is_setting_timestamp(self, line):
64 |         return line.startswith("SET TIMESTAMP=") and line.endswith("/*!*/;")
65 | 
66 |     def _handle_header_line(self, line):
67 |         m = re.search("\\#(\\d+)\\s+(\\d+:\\d+:\\d+)\\s+server\\s+id\\s+\\d+", line)
68 |         datetime_str = "%s %s" % (m.group(1), m.group(2))
69 |         dt = datetime.datetime.strptime(datetime_str, '%y%m%d %H:%M:%S')
70 |         new_header_timestamp = int(time.mktime(dt.timetuple()))
71 |         self.header_timestamp = new_header_timestamp
72 | 
73 |     def _is_header_line(self, line):
74 |         regex = "\\#(\\d+)\\s+(\\d+:\\d+:\\d+)\\s+server\\s+id\\s+\\d+.+(Update_rows|Write_rows|Delete_rows)"
75 |         return re.search(regex, line) is not None
76 | 
77 |     def _handle_timestamp_line(self, line):
78 |         m = re.search("SET\\ TIMESTAMP=(\\d+)/\\*!\\*/;", line)
79 |         new_timestamp = int(m.group(1))
80 |         self.timestamp = new_timestamp
81 | 
82 |     def _is_updating(self, line):
83 |         return any(line.startswith("### %s " % s) for s in ['INSERT INTO', 'UPDATE', 'DELETE FROM'])
84 | 
85 |     def _handle_update_line(self, line):
86 |         m = re.search("\\#\\#\\#\\ (DELETE\\ FROM|INSERT\\ INTO|UPDATE)\\ (.+)", line)
87 |         statement_type = self.statement_to_type[m.group(1)]
88 |         table = m.group(2)
89 | 
90 |         print json.dumps({
91 |             'timestamp': self.header_timestamp,
92 |             'statement_type': statement_type,
93 |             'table': table
94 |         })
95 | 
96 | 
97 | if __name__ == "__main__":
98 |     BinlogParser().run()
99 | 


--------------------------------------------------------------------------------
/tests/client_test.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright 2016 Yelp Inc.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #   http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing,
 11 | # software distributed under the License is distributed on an
 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 13 | # KIND, either express or implied.  See the License for the
 14 | # specific language governing permissions and limitations
 15 | # under the License.
 16 | from __future__ import absolute_import
 17 | from __future__ import unicode_literals
 18 | 
 19 | import mock
 20 | import pytest
 21 | 
 22 | from data_pipeline.client import Client
 23 | from data_pipeline.expected_frequency import ExpectedFrequency
 24 | 
 25 | 
 26 | class ClientTester(Client):
 27 |     @property
 28 |     def client_type(self):
 29 |         return 'tester'
 30 | 
 31 | 
 32 | @pytest.mark.usefixtures('configure_teams')
 33 | class TestClient(object):
 34 |     @property
 35 |     def client_name(self):
 36 |         return 'test_client'
 37 | 
 38 |     @property
 39 |     def team_name(self):
 40 |         return 'bam'
 41 | 
 42 |     @property
 43 |     def expected_frequency_seconds(self):
 44 |         return 0
 45 | 
 46 |     def _build_client(self, **override_kwargs):
 47 |         args = dict(
 48 |             client_name=self.client_name,
 49 |             team_name=self.team_name,
 50 |             expected_frequency_seconds=self.expected_frequency_seconds,
 51 |             monitoring_enabled=False
 52 |         )
 53 |         args.update(override_kwargs)
 54 |         return ClientTester(**args)
 55 | 
 56 |     def test_default_client_is_valid(self):
 57 |         self._assert_valid(self._build_client())
 58 | 
 59 |     def test_string_client_name_is_valid(self):
 60 |         name = str("test_client")
 61 |         assert self._build_client(client_name=name).client_name == name
 62 | 
 63 |     def test_non_string_client_name(self):
 64 |         self._assert_invalid(client_name=1)
 65 | 
 66 |     def test_empty_client_name(self):
 67 |         self._assert_invalid(client_name='')
 68 | 
 69 |     def test_invalid_team_name(self):
 70 |         self._assert_invalid(team_name='bogus_team')
 71 | 
 72 |     def test_negative_expected_frequency_seconds(self):
 73 |         self._assert_invalid(expected_frequency_seconds=-1)
 74 | 
 75 |     def test_expected_frequency_seconds_constant_is_valid(self):
 76 |         client = self._build_client(
 77 |             expected_frequency_seconds=ExpectedFrequency.constantly
 78 |         )
 79 |         assert client.expected_frequency_seconds == 0
 80 | 
 81 |     def _assert_invalid(self, **client_kwargs):
 82 |         with pytest.raises(ValueError):
 83 |             self._build_client(**client_kwargs)
 84 | 
 85 |     def _assert_valid(self, client):
 86 |         assert client.client_name == self.client_name
 87 |         assert client.team_name == self.team_name
 88 |         assert client.expected_frequency_seconds == self.expected_frequency_seconds
 89 | 
 90 |     @pytest.mark.parametrize("method, skipped_method, kwargs", [
 91 |         ('record_message', '_get_record', {'message': None}),
 92 |         ('close', 'flush_buffered_info', {}),
 93 |     ])
 94 |     def test_method_call_with_disabled_monitoring(self, method, skipped_method, kwargs):
 95 |         client = self._build_client(
 96 |             expected_frequency_seconds=ExpectedFrequency.constantly
 97 |         )
 98 |         with mock.patch.object(client.monitor, skipped_method) as uncalled_method:
 99 |             getattr(client.monitor, method)(**kwargs)
100 |             assert uncalled_method.called == 0
101 | 


--------------------------------------------------------------------------------
/data_pipeline/tools/introspector/info/source.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright 2016 Yelp Inc.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #   http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing,
 11 | # software distributed under the License is distributed on an
 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 13 | # KIND, either express or implied.  See the License for the
 14 | # specific language governing permissions and limitations
 15 | # under the License.
 16 | from __future__ import absolute_import
 17 | from __future__ import unicode_literals
 18 | 
 19 | import simplejson
 20 | 
 21 | from data_pipeline.tools.introspector.base_command import IntrospectorCommand
 22 | from data_pipeline.tools.introspector.models import IntrospectorSource
 23 | 
 24 | 
 25 | class SourceInfoCommand(IntrospectorCommand):
 26 |     @classmethod
 27 |     def add_parser(cls, subparsers):
 28 |         info_command_parser = subparsers.add_parser(
 29 |             "source",
 30 |             description="Get information on a specific data pipeline source.",
 31 |             add_help=False
 32 |         )
 33 | 
 34 |         info_command_parser.add_argument(
 35 |             '--active-sources',
 36 |             default=False,
 37 |             action='store_true',
 38 |             help=(
 39 |                 'If set, this command will also return information about active '
 40 |                 'topics for this source. '
 41 |                 'This is a time expensive operation.'
 42 |             )
 43 |         )
 44 | 
 45 |         cls.add_base_arguments(info_command_parser)
 46 |         cls.add_source_and_namespace_arguments(info_command_parser)
 47 | 
 48 |         info_command_parser.set_defaults(
 49 |             command=lambda args: cls("data_pipeline_instropsector_info_source").run(
 50 |                 args,
 51 |                 info_command_parser
 52 |             )
 53 |         )
 54 | 
 55 |     def process_args(self, args, parser):
 56 |         super(SourceInfoCommand, self).process_args(args, parser)
 57 |         self.process_source_and_namespace_args(args, parser)
 58 | 
 59 |     def info_source(
 60 |         self,
 61 |         source_id=None,
 62 |         source_name=None,
 63 |         namespace_name=None,
 64 |         active_sources=False
 65 |     ):
 66 |         info_source = None
 67 |         if source_id:
 68 |             info_source = self.schematizer.get_source_by_id(source_id)
 69 |         else:
 70 |             sources = self.schematizer.get_sources_by_namespace(namespace_name)
 71 |             for source in sources:
 72 |                 if source.name == source_name:
 73 |                     info_source = source
 74 |                     break
 75 |             if not info_source:
 76 |                 raise ValueError("Given SOURCE_NAME|NAMESPACE_NAME doesn't exist")
 77 |         info_source = IntrospectorSource(
 78 |             info_source
 79 |         ).to_ordered_dict()
 80 |         topics = self.list_topics(
 81 |             source_id=info_source["source_id"]
 82 |         )
 83 |         if active_sources:
 84 |             info_source['active_topic_count'] = len(
 85 |                 [topic for topic in topics if topic['message_count']]
 86 |             )
 87 |         info_source['topics'] = topics
 88 |         return info_source
 89 | 
 90 |     def run(self, args, parser):
 91 |         self.process_args(args, parser)
 92 |         print simplejson.dumps(
 93 |             self.info_source(
 94 |                 source_id=self.source_id,
 95 |                 source_name=self.source_name,
 96 |                 namespace_name=self.namespace,
 97 |                 active_sources=args.active_sources
 98 |             )
 99 |         )
100 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
  1 | ============
  2 | Contributing
  3 | ============
  4 | 
  5 | Contributions are welcome, and they are greatly appreciated! Every
  6 | little bit helps, and credit will always be given.
  7 | 
  8 | You can contribute in many ways:
  9 | 
 10 | Types of Contributions
 11 | ----------------------
 12 | 
 13 | Report Bugs
 14 | ~~~~~~~~~~~
 15 | 
 16 | Report bugs at https://jira.yelpcorp.com/browse/DATAPIPE/,
 17 | on the DATAPIPE project.
 18 | 
 19 | If you are reporting a bug, please include:
 20 | 
 21 | * Any details about your local setup that might be helpful in troubleshooting.
 22 | * Detailed steps to reproduce the bug.
 23 | 
 24 | Fix Bugs or Implement Features
 25 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 26 | 
 27 | Look through the Jira issues. Anything
 28 | is open to whoever wants to implement it.
 29 | 
 30 | Write Documentation
 31 | ~~~~~~~~~~~~~~~~~~~
 32 | 
 33 | Data Pipeline Clientlib could always use more documentation, whether as part of the
 34 | official Data Pipeline Clientlib docs, in docstrings, or even on trac.
 35 | 
 36 | Submit Feedback
 37 | ~~~~~~~~~~~~~~~
 38 | 
 39 | The best way to send feedback is to file an issue at
 40 | https://jira.yelpcorp.com/browse/DATAPIPE/ on the
 41 | DATAPIPE project.
 42 | 
 43 | If you are proposing a feature:
 44 | 
 45 | * Explain in detail how it would work.
 46 | * Keep the scope as narrow as possible, to make it easier to implement.
 47 | * Remember that contributions are welcome :)
 48 | 
 49 | Get Started!
 50 | ------------
 51 | 
 52 | Ready to contribute? Here's how to set up `data_pipeline` for
 53 | local development.
 54 | 
 55 | 1. Clone the `data_pipeline` repo::
 56 | 
 57 |     $ git clone git@git.yelpcorp.com:clientlibs/data_pipeline
 58 | 
 59 | 2. Create a branch for local development::
 60 | 
 61 |     $ git checkout -b name-of-your-bugfix-or-feature
 62 | 
 63 | Now you can make your changes locally.
 64 | 
 65 | See :doc:`index` for information about setting up TDD tools.
 66 | 
 67 | 3. When you're done making changes, check that your changes pass style and unit
 68 |    tests, including testing other Python versions with tox::
 69 | 
 70 |     $ tox
 71 | 
 72 | To get tox, just pip install it.
 73 | 
 74 | 4. Commit your changes and push your branch::
 75 | 
 76 |     $ git add .
 77 |     $ git commit -m "Your detailed description of your changes."
 78 |     $ git push origin name-of-your-bugfix-or-feature
 79 | 
 80 | 5. Get a code review::
 81 | 
 82 |     $ review-branch
 83 | 
 84 | Contribution Guidelines
 85 | -----------------------
 86 | 
 87 | Before you submit a pull request, check that it meets these guidelines:
 88 | 
 89 | 1. The change should include tests.
 90 | 2. If the change adds functionality, the docs should be updated. Put
 91 |    your new functionality into a function with a docstring, and add the
 92 |    feature to the list in README.rst.
 93 | 3. The pull request should work for Python 2.6, 2.7, and 3.3, and for PyPy.
 94 |    Run the ``tox`` command and make sure that the tests pass for all supported
 95 |    Python versions.
 96 | 
 97 | Building Tools
 98 | --------------
 99 | 
100 | Before you submit a pull request, make sure that any new tool meets these guidelines:
101 | 
102 | 1. Your tool is runnable from the commandline 
103 |    (with python or on it's own) in the data_pipeline/tools folder
104 | 2. Any python libraries your tool requires is in both 
105 |    requirements-tools.txt and in setup.py in extras_require["tools"]
106 | 3. In bin, create a script that can be run from the commandline that runs your tool.
107 |    See the others in the folder as an example on what to do with a python batch.
108 |    Put the path to this new script in setup.py under scripts
109 | 4. In debian/data-pipeline-tools.links create a new link. Use the others there as an example.
110 | 5. In yelp_package/itest/ubuntu.sh, add a line to the string SCRIPTS 
111 |    with the name of the script you put in bin
112 | 
113 | Tips
114 | ----
115 | 
116 | To run a subset of tests::
117 | 
118 |      $ py.test tests/data_pipeline_test.py
119 | 


--------------------------------------------------------------------------------
/tests/_retry_util_test.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright 2016 Yelp Inc.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #   http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing,
 11 | # software distributed under the License is distributed on an
 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 13 | # KIND, either express or implied.  See the License for the
 14 | # specific language governing permissions and limitations
 15 | # under the License.
 16 | from __future__ import absolute_import
 17 | from __future__ import unicode_literals
 18 | 
 19 | import mock
 20 | import pytest
 21 | 
 22 | from data_pipeline._retry_util import ExpBackoffPolicy
 23 | from data_pipeline._retry_util import MaxRetryError
 24 | from data_pipeline._retry_util import Predicate
 25 | from data_pipeline._retry_util import retry_on_condition
 26 | from data_pipeline._retry_util import RetryPolicy
 27 | 
 28 | 
 29 | # TODO(DATAPIPE-368|clin): add unit tests for rest of the retry module
 30 | 
 31 | class TestRetryOnCondition(object):
 32 | 
 33 |     def always_true(self):
 34 |         return True
 35 | 
 36 |     def always_false(self):
 37 |         return False
 38 | 
 39 |     @pytest.fixture
 40 |     def return_true_then_false_func(self):
 41 |         return mock.Mock(side_effect=(True, False))
 42 | 
 43 |     @pytest.fixture
 44 |     def number_sequence_func(self):
 45 |         return mock.Mock(side_effect=(i + 1 for i in xrange(10)))
 46 | 
 47 |     @property
 48 |     def max_retry_count(self):
 49 |         return 3
 50 | 
 51 |     @pytest.fixture
 52 |     def exp_backoff_with_max_retry_count_policy(self):
 53 |         return RetryPolicy(
 54 |             ExpBackoffPolicy(
 55 |                 initial_delay_secs=0.1,
 56 |                 max_delay_secs=0.5,
 57 |                 backoff_factor=2
 58 |             ),
 59 |             max_retry_count=self.max_retry_count,
 60 |         )
 61 | 
 62 |     def test_no_retry(
 63 |         self,
 64 |         number_sequence_func,
 65 |         exp_backoff_with_max_retry_count_policy
 66 |     ):
 67 |         actual = retry_on_condition(
 68 |             retry_policy=exp_backoff_with_max_retry_count_policy,
 69 |             retry_conditions=[Predicate(self.always_false)],
 70 |             func_to_retry=number_sequence_func
 71 |         )
 72 |         assert actual == 1
 73 |         assert number_sequence_func.call_count == 1
 74 | 
 75 |     def test_exceed_max_retry_count(
 76 |         self,
 77 |         number_sequence_func,
 78 |         exp_backoff_with_max_retry_count_policy
 79 |     ):
 80 |         with pytest.raises(MaxRetryError) as e, mock.patch.object(
 81 |             exp_backoff_with_max_retry_count_policy.backoff_policy,
 82 |             'next_backoff_delay',
 83 |             return_value=0.1
 84 |         ) as next_backoff_delay_spy:
 85 |             retry_on_condition(
 86 |                 retry_policy=exp_backoff_with_max_retry_count_policy,
 87 |                 retry_conditions=[Predicate(self.always_true)],
 88 |                 func_to_retry=number_sequence_func
 89 |             )
 90 |         assert number_sequence_func.call_count == self.max_retry_count + 1
 91 |         assert e.value.last_result == 4
 92 |         assert next_backoff_delay_spy.call_count == self.max_retry_count
 93 | 
 94 |     def test_use_previous_result_as_params_in_retry(
 95 |         self,
 96 |         return_true_then_false_func,
 97 |         exp_backoff_with_max_retry_count_policy
 98 |     ):
 99 |         actual = retry_on_condition(
100 |             retry_policy=exp_backoff_with_max_retry_count_policy,
101 |             retry_conditions=[Predicate(return_true_then_false_func)],
102 |             func_to_retry=lambda i: i + i + i,
103 |             use_previous_result_as_param=True,
104 |             i=1
105 |         )
106 |         assert actual == 9
107 | 


--------------------------------------------------------------------------------
/tests/envelope_test.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright 2016 Yelp Inc.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #   http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing,
 11 | # software distributed under the License is distributed on an
 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 13 | # KIND, either express or implied.  See the License for the
 14 | # specific language governing permissions and limitations
 15 | # under the License.
 16 | from __future__ import absolute_import
 17 | from __future__ import unicode_literals
 18 | 
 19 | import pytest
 20 | 
 21 | from data_pipeline import message as dp_message
 22 | from data_pipeline.envelope import Envelope
 23 | from data_pipeline.meta_attribute import MetaAttribute
 24 | 
 25 | 
 26 | class TestEnvelope(object):
 27 | 
 28 |     @pytest.fixture
 29 |     def envelope(self):
 30 |         return Envelope()
 31 | 
 32 |     @pytest.fixture(params=[
 33 |         None,
 34 |         {'good_payload': 26}
 35 |     ])
 36 |     def meta_attr_payload_data(self, request):
 37 |         return request.param
 38 | 
 39 |     @pytest.fixture
 40 |     def valid_meta(
 41 |         self,
 42 |         meta_attr_payload_data,
 43 |         registered_meta_attribute_schema
 44 |     ):
 45 |         if meta_attr_payload_data is None:
 46 |             return None
 47 |         meta_attr = MetaAttribute(
 48 |             schema_id=registered_meta_attribute_schema.schema_id,
 49 |             payload_data=meta_attr_payload_data
 50 |         )
 51 |         return [meta_attr]
 52 | 
 53 |     @pytest.fixture
 54 |     def meta_attr_param(self, valid_meta):
 55 |         return {'meta': valid_meta}
 56 | 
 57 |     @pytest.fixture(params=[
 58 |         (dp_message.CreateMessage, {}),
 59 |         (dp_message.RefreshMessage, {}),
 60 |         (dp_message.DeleteMessage, {}),
 61 |         (dp_message.UpdateMessage, {'previous_payload': bytes(20)})
 62 |     ])
 63 |     def message(self, request, registered_schema, payload, meta_attr_param):
 64 |         message_class, additional_params = request.param
 65 |         if meta_attr_param:
 66 |             additional_params.update(meta_attr_param)
 67 |         return message_class(
 68 |             schema_id=registered_schema.schema_id,
 69 |             payload=payload,
 70 |             **additional_params
 71 |         )
 72 | 
 73 |     @pytest.fixture
 74 |     def expected_unpacked_message(self, message):
 75 |         previous_payload = None
 76 |         if isinstance(message, dp_message.UpdateMessage):
 77 |             previous_payload = message.previous_payload
 78 |         return dict(
 79 |             encryption_type=message.encryption_type,
 80 |             message_type=message.message_type.name,
 81 |             meta=[
 82 |                 meta_attr.avro_repr
 83 |                 for meta_attr in message.meta
 84 |             ] if message.meta else None,
 85 |             payload=message.payload,
 86 |             previous_payload=previous_payload,
 87 |             schema_id=message.schema_id,
 88 |             timestamp=message.timestamp,
 89 |             uuid=message.uuid
 90 |         )
 91 | 
 92 |     def test_pack_create_bytes(self, message, envelope):
 93 |         assert isinstance(envelope.pack(message), bytes)
 94 | 
 95 |     def test_pack_create_str(self, message, envelope):
 96 |         assert isinstance(envelope.pack(message, ascii_encoded=True), str)
 97 | 
 98 |     def test_pack_unpack(self, message, envelope, expected_unpacked_message):
 99 |         unpacked = envelope.unpack(envelope.pack(message))
100 |         assert unpacked == expected_unpacked_message
101 | 
102 |     def test_pack_unpack_ascii(self, message, envelope, expected_unpacked_message):
103 |         unpacked = envelope.unpack(envelope.pack(message, ascii_encoded=True))
104 |         assert unpacked == expected_unpacked_message
105 | 


--------------------------------------------------------------------------------