├── .cane ├── .gitignore ├── .rspec ├── .travis.yml ├── Gemfile ├── README.md ├── Rakefile ├── bin └── pipely ├── graphs └── .DS_Store ├── lib ├── pipely.rb └── pipely │ ├── actions.rb │ ├── actions │ ├── graph_file_pipeline.rb │ ├── graph_live_pipeline.rb │ └── list_live_pipelines.rb │ ├── build.rb │ ├── build │ ├── daily_scheduler.rb │ ├── definition.rb │ ├── environment_config.rb │ ├── hourly_scheduler.rb │ ├── right_now_scheduler.rb │ ├── s3_path_builder.rb │ ├── template.rb │ └── template_helpers.rb │ ├── bundler.rb │ ├── bundler │ ├── bundle.rb │ ├── gem_packager.rb │ └── project_gem.rb │ ├── component.rb │ ├── definition.rb │ ├── dependency.rb │ ├── deploy.rb │ ├── deploy │ ├── bootstrap.rb │ ├── bootstrap_context.rb │ ├── bootstrap_registry.rb │ ├── client.rb │ ├── json_definition.rb │ └── s3_uploader.rb │ ├── graph_builder.rb │ ├── live_pipeline.rb │ ├── options.rb │ ├── pipeline_date_time │ ├── pipeline_date.rb │ ├── pipeline_date_pattern.rb │ ├── pipeline_date_range_base.rb │ ├── pipeline_day_range.rb │ ├── pipeline_month_range.rb │ └── pipeline_year_range.rb │ ├── reference_list.rb │ ├── runs_report.rb │ ├── s3_writer.rb │ ├── shared_examples.rb │ ├── tasks.rb │ ├── tasks │ ├── definition.rb │ ├── deploy.rb │ ├── graph.rb │ ├── upload_pipeline_as_gem.rb │ └── upload_steps.rb │ └── version.rb ├── pipely.gemspec └── spec ├── fixtures ├── bootstrap_contexts │ ├── green.rb │ └── simple.rb ├── templates │ └── bootstrap.sh.erb └── vcr_cassettes │ └── build_and_upload_gems.yml ├── lib ├── pipely │ ├── build │ │ ├── daily_scheduler_spec.rb │ │ ├── environment_config_spec.rb │ │ ├── right_now_scheduler_spec.rb │ │ ├── s3_path_builder_spec.rb │ │ └── template_spec.rb │ ├── build_spec.rb │ ├── bundler │ │ ├── bundle_spec.rb │ │ ├── gem_packager_spec.rb │ │ └── project_gem_spec.rb │ ├── component_spec.rb │ ├── definition_spec.rb │ ├── dependency_spec.rb │ ├── deploy │ │ ├── bootstrap_context_spec.rb │ │ ├── bootstrap_registry_spec.rb │ │ ├── bootstrap_spec.rb │ │ ├── client_spec.rb │ │ └── s3_uploader_spec.rb │ ├── graph_builder_spec.rb │ ├── pipeline_date_time │ │ ├── pipeline_date_pattern_spec.rb │ │ ├── pipeline_date_range_base_spec.rb │ │ ├── pipeline_date_spec.rb │ │ ├── pipeline_day_range_spec.rb │ │ ├── pipeline_month_range_spec.rb │ │ └── pipeline_year_range_spec.rb │ ├── reference_list_spec.rb │ └── tasks │ │ └── upload_pipeline_as_gem_spec.rb └── pipely_spec.rb └── spec_helper.rb /.cane: -------------------------------------------------------------------------------- 1 | --style-exclude spec/lib/pipely/build/template_spec.rb 2 | --style-exclude spec/lib/pipely/build/s3_path_builder_spec.rb 3 | --abc-exclude Pipely::Build::TemplateHelpers#streaming_hadoop_step,Pipely::Options.parse 4 | --doc-exclude lib/pipely/tasks/*.rb 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.swp 3 | *.gem 4 | Gemfile.lock 5 | graphs 6 | -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --colour 2 | --profile 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: ruby 2 | rvm: 3 | - 1.9.3 4 | - 2.0.0 5 | script: bundle exec rake 6 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "http://rubygems.org" 2 | 3 | gemspec 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | pipely 2 | ====== 3 | [![Gem Version](https://badge.fury.io/rb/pipely.png)](http://badge.fury.io/rb/pipely) [![Build Status](https://travis-ci.org/swipely/pipely.png?branch=master)](https://travis-ci.org/swipely/pipely) [![Code Climate](https://codeclimate.com/repos/524b941156b1025b6c08a96a/badges/c0ad2bbec610f1d0f0f7/gpa.png)](https://codeclimate.com/repos/524b941156b1025b6c08a96a/feed) 4 | 5 | Build, deploy, and visualize pipeline definitions for AWS Data Pipeline 6 | 7 | "AWS Data Pipeline is a web service that you can use to automate the movement and transformation of data. With AWS Data Pipeline, you can define data-driven workflows, so that tasks can be dependent on the successful completion of previous tasks." 8 | 9 | http://docs.aws.amazon.com/datapipeline/latest/DeveloperGuide/what-is-datapipeline.html 10 | 11 | 12 | ## Install 13 | 14 | (First install [GraphViz](http://www.graphviz.org) if it is not already installed.) 15 | 16 | Into Gemfile from rubygems.org: 17 | 18 | gem 'pipely' 19 | 20 | Into environment gems from rubygems.org: 21 | 22 | gem install pipely 23 | 24 | 25 | ## Usage 26 | 27 | ### Rake Tasks 28 | 29 | Coming soon. 30 | 31 | rake definition # Graphs the full pipeline definition using Graphviz 32 | rake deploy # Deploy pipeline 33 | rake graph # Graphs the full pipeline definition using Graphviz 34 | rake upload_steps # Upload Data Pipeline steps to S3 35 | 36 | In order to run these tasks, you must have an aws-sdk credentials file. 37 | This can be created by running `aws cli configure`, as described [in the aws-sdk docs](http://docs.aws.amazon.com/AWSSdkDocsRuby/latest/DeveloperGuide/ruby-dg-setup.html#set-up-creds). 38 | 39 | ### Command-line Interface 40 | 41 | (If you used the Gemfile install, prefix the below commands with `bundle exec`.) 42 | 43 | To render a JSON pipeline definition as a PNG graph visualization: 44 | 45 | pipely definition.json 46 | 47 | To specify the output path for PNG files: 48 | 49 | pipely -o path/to/graph/pngs definition.json 50 | 51 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env rake 2 | begin 3 | require 'bundler/setup' 4 | rescue LoadError 5 | puts 'You must `gem install bundler` and `bundle install` to run rake tasks' 6 | end 7 | 8 | Bundler::GemHelper.install_tasks 9 | 10 | require 'rspec/core/rake_task' 11 | require 'cane/rake_task' 12 | 13 | RSpec::Core::RakeTask.new do |t| 14 | t.pattern = 'spec/**/*_spec.rb' 15 | end 16 | 17 | Cane::RakeTask.new(:quality) do |cane| 18 | cane.canefile = '.cane' 19 | end 20 | 21 | task :default => [:spec, :quality] 22 | -------------------------------------------------------------------------------- /bin/pipely: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require 'pipely' 4 | 5 | options = Pipely::Options.parse 6 | 7 | if options.pipeline_id 8 | Pipely::Actions::GraphLivePipeline.new(options).execute 9 | 10 | elsif options.input_path 11 | Pipely::Actions::GraphFilePipeline.new(options).execute 12 | 13 | else 14 | Pipely::Actions::ListLivePipelines.new(options).execute 15 | 16 | end 17 | -------------------------------------------------------------------------------- /graphs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swipely/pipely/f6c66d707b5e10aecb6f501e172bfbf6006e3d1f/graphs/.DS_Store -------------------------------------------------------------------------------- /lib/pipely.rb: -------------------------------------------------------------------------------- 1 | require 'pipely/options' 2 | require 'pipely/actions' 3 | require 'pipely/definition' 4 | require 'pipely/graph_builder' 5 | require 'pipely/live_pipeline' 6 | require 'pipely/s3_writer' 7 | 8 | # The top-level module for this gem. It provides the recommended public 9 | # interface for using Pipely to visualize and manipulate your Data Pipeline 10 | # definitions. 11 | # 12 | module Pipely 13 | 14 | ENV['AWS_REGION'] ||= 'us-east-1' 15 | 16 | def self.draw(definition_json, filename, component_attributes=nil) 17 | definition = Definition.parse(definition_json) 18 | 19 | if component_attributes 20 | definition.apply_component_attributes(component_attributes) 21 | end 22 | 23 | graph_builder = GraphBuilder.new 24 | 25 | graph = graph_builder.build(definition.components_for_graph) 26 | 27 | if filename.start_with?('s3://') 28 | content = graph.output( :png => String ) 29 | S3Writer.new(filename).write(content) 30 | else 31 | graph.output( :png => filename ) 32 | filename 33 | end 34 | end 35 | 36 | end 37 | -------------------------------------------------------------------------------- /lib/pipely/actions.rb: -------------------------------------------------------------------------------- 1 | require 'pipely/actions/graph_live_pipeline' 2 | require 'pipely/actions/graph_file_pipeline' 3 | require 'pipely/actions/list_live_pipelines' 4 | 5 | module Pipely 6 | module Actions 7 | end 8 | end 9 | -------------------------------------------------------------------------------- /lib/pipely/actions/graph_file_pipeline.rb: -------------------------------------------------------------------------------- 1 | module Pipely 2 | module Actions 3 | 4 | # Graph a pipeline definition from a file. 5 | # 6 | class GraphFilePipeline 7 | 8 | def initialize(options) 9 | @options = options 10 | end 11 | 12 | def execute 13 | puts "Generating #{output_file}" 14 | Pipely.draw(definition_json, output_file) 15 | end 16 | 17 | private 18 | 19 | def definition_json 20 | File.open(@options.input_path).read 21 | end 22 | 23 | def output_base 24 | @output_base ||= File.basename(@options.input_path,".*") + '.png' 25 | end 26 | 27 | def output_file 28 | @output_file ||= if @options.output_path 29 | File.join(@options.output_path, output_base) 30 | else 31 | output_base 32 | end 33 | end 34 | 35 | end 36 | 37 | end 38 | end 39 | 40 | -------------------------------------------------------------------------------- /lib/pipely/actions/graph_live_pipeline.rb: -------------------------------------------------------------------------------- 1 | module Pipely 2 | module Actions 3 | 4 | # Graph a deployed pipeline with live execution statuses. 5 | # 6 | class GraphLivePipeline 7 | 8 | def initialize(options) 9 | @options = options 10 | end 11 | 12 | def execute 13 | live_pipeline = Pipely::LivePipeline.new(@options.pipeline_id) 14 | live_pipeline.print_runs_report 15 | 16 | outfile = if @options.latest_run 17 | live_pipeline.render_latest_graph(@options.output_path) 18 | else 19 | live_pipeline.render_graphs(@options.output_path) 20 | end 21 | 22 | if @options.json_output 23 | $stdout.puts({ :graph => outfile }.to_json) 24 | elsif $stdout.tty? 25 | $stdout.puts "Generated #{outfile}" 26 | else 27 | $stdout.puts outfile 28 | end 29 | end 30 | 31 | end 32 | 33 | end 34 | end 35 | -------------------------------------------------------------------------------- /lib/pipely/actions/list_live_pipelines.rb: -------------------------------------------------------------------------------- 1 | module Pipely 2 | module Actions 3 | 4 | # List currently deployed pipelines 5 | # 6 | class ListLivePipelines 7 | 8 | def initialize(options) 9 | @options = options 10 | end 11 | 12 | def execute 13 | if @options.json_output 14 | $stdout.puts pipeline_ids.to_json 15 | else 16 | $stdout.puts pipeline_ids.map { |pipeline| 17 | [ pipeline.name, pipeline.id ].join("\t") 18 | } 19 | end 20 | end 21 | 22 | private 23 | 24 | def pipeline_ids 25 | ids = [] 26 | 27 | data_pipeline = Aws::DataPipeline::Client.new 28 | 29 | 30 | marker = nil 31 | begin 32 | result = data_pipeline.list_pipelines( 33 | marker: marker, 34 | ) 35 | ids += result.pipeline_id_list 36 | marker = result.marker 37 | end while (result.has_more_results && marker) 38 | 39 | ids 40 | end 41 | end 42 | 43 | end 44 | end 45 | -------------------------------------------------------------------------------- /lib/pipely/build.rb: -------------------------------------------------------------------------------- 1 | require 'pipely/build/definition' 2 | require 'pipely/build/template' 3 | require 'pipely/build/daily_scheduler' 4 | require 'pipely/build/hourly_scheduler' 5 | require 'pipely/build/right_now_scheduler' 6 | require 'pipely/build/s3_path_builder' 7 | require 'pipely/build/environment_config' 8 | require 'pathology' 9 | 10 | module Pipely 11 | 12 | # Turn Templates+config into a deployable Definition. 13 | # 14 | module Build 15 | 16 | def self.build_definition(template, environment, config_path) 17 | env = environment.to_sym 18 | config = EnvironmentConfig.load(config_path, env) 19 | 20 | Definition.new(template, env, config) 21 | end 22 | 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /lib/pipely/build/daily_scheduler.rb: -------------------------------------------------------------------------------- 1 | module Pipely 2 | module Build 3 | 4 | # Compute schedule attributes for a pipeline that runs once-a-day at a set 5 | # time. 6 | # 7 | class DailyScheduler 8 | 9 | def initialize(start_time) 10 | @start_time = DateTime.parse(start_time).strftime('%H:%M:%S') 11 | end 12 | 13 | def period 14 | '24 hours' 15 | end 16 | 17 | def start_date_time 18 | date = Date.today 19 | 20 | # if start_time already happened today, wait for tomorrow's start_time 21 | now_time = Time.now.utc.strftime('%H:%M:%S') 22 | date += 1 if now_time >= @start_time 23 | 24 | date.strftime("%Y-%m-%dT#{@start_time}") 25 | end 26 | 27 | def to_hash 28 | { 29 | :period => period, 30 | :start_date_time => start_date_time 31 | } 32 | end 33 | 34 | end 35 | 36 | end 37 | end 38 | -------------------------------------------------------------------------------- /lib/pipely/build/definition.rb: -------------------------------------------------------------------------------- 1 | module Pipely 2 | module Build 3 | 4 | # Represent a pipeline definition, built from a Template and some config. 5 | # 6 | class Definition < Struct.new(:template, :env, :config) 7 | extend Forwardable 8 | 9 | def_delegators :template, :pipeline_id=, :pipeline_id 10 | 11 | def pipeline_name 12 | config[:name] 13 | end 14 | 15 | def base_filename 16 | config[:namespace] 17 | end 18 | 19 | def s3_prefix 20 | if config[:s3_prefix] 21 | template = Pathology.template(config[:s3_prefix]) 22 | template.interpolate(interpolation_context) 23 | else 24 | fail('unspecified s3_prefix') 25 | end 26 | end 27 | 28 | def s3_path_builder 29 | S3PathBuilder.new(config[:s3].merge(prefix: s3_prefix)) 30 | end 31 | 32 | def to_json 33 | template.apply_config(:environment => env) 34 | template.apply_config(config) 35 | template.apply_config(s3_path_builder.to_hash) 36 | template.apply_config(scheduler.to_hash) 37 | 38 | template.to_json 39 | end 40 | 41 | def scheduler 42 | case config[:scheduler] 43 | when 'daily' 44 | DailyScheduler.new(config[:start_time]) 45 | when 'now' 46 | RightNowScheduler.new 47 | when 'hourly' 48 | HourlyScheduler.new 49 | else 50 | fail('unspecified scheduler') 51 | end 52 | end 53 | 54 | private 55 | 56 | def interpolation_context 57 | config.merge({ 58 | :whoami => `whoami`.strip, 59 | }) 60 | end 61 | 62 | end 63 | end 64 | end 65 | -------------------------------------------------------------------------------- /lib/pipely/build/environment_config.rb: -------------------------------------------------------------------------------- 1 | require 'yaml' 2 | 3 | module Pipely 4 | module Build 5 | 6 | # Work with YAML config files that contain parallel configs for various 7 | # environments. 8 | # 9 | class EnvironmentConfig < Hash 10 | 11 | # Continue supporting env-based defaults until pipely v1.0 12 | ENV_DEFAULTS = { 13 | production: { 14 | s3_prefix: 'production/:namespace', 15 | scheduler: 'daily', 16 | start_time: '11:00:00', 17 | }, 18 | staging: { 19 | s3_prefix: 'staging/:whoami/:namespace', 20 | scheduler: 'now', 21 | 22 | # Since scheduler can now be overridden via commandline argument, 23 | # supply a start_time even for environments that default to 'now'. 24 | start_time: '11:00:00', 25 | } 26 | } 27 | 28 | def self.load(filename, environment) 29 | raw = YAML.load_file(filename)[environment.to_s] 30 | config = load_from_hash(raw) 31 | 32 | if defaults = ENV_DEFAULTS[environment.to_sym] 33 | defaults.merge(config) 34 | else 35 | config 36 | end 37 | end 38 | 39 | def self.load_from_hash(attributes) 40 | config = new 41 | 42 | attributes.each do |k, v| 43 | case v 44 | when Hash 45 | config[k.to_sym] = load_from_hash(v) 46 | else 47 | config[k.to_sym] = v.clone 48 | end 49 | end 50 | 51 | config 52 | end 53 | 54 | end 55 | 56 | end 57 | end 58 | -------------------------------------------------------------------------------- /lib/pipely/build/hourly_scheduler.rb: -------------------------------------------------------------------------------- 1 | module Pipely 2 | module Build 3 | 4 | # Compute schedule attributes for a pipeline that runs once-a-day at a set 5 | # time. 6 | # 7 | class HourlyScheduler 8 | 9 | def period 10 | '1 hours' 11 | end 12 | 13 | def start_date_time 14 | 15 | (Time.now.utc + 3600).strftime("%Y-%m-%dT%H:00:00") 16 | 17 | end 18 | 19 | def to_hash 20 | { 21 | :period => period, 22 | :start_date_time => start_date_time 23 | } 24 | end 25 | 26 | end 27 | 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /lib/pipely/build/right_now_scheduler.rb: -------------------------------------------------------------------------------- 1 | module Pipely 2 | module Build 3 | 4 | # Compute schedule attributes for a pipeline that should run immediately 5 | # after being deployed. 6 | # 7 | class RightNowScheduler 8 | 9 | def period 10 | # DataPipeline is soon releasing a run-once feature. 11 | # TODO: Switch to that when available. 12 | '1 year' 13 | end 14 | 15 | def start_date_time 16 | Time.now.utc.strftime("%Y-%m-%dT%H:%M:%S") 17 | end 18 | 19 | def to_hash 20 | { 21 | :period => period, 22 | :start_date_time => start_date_time 23 | } 24 | end 25 | 26 | end 27 | 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /lib/pipely/build/s3_path_builder.rb: -------------------------------------------------------------------------------- 1 | require 'pathology' 2 | 3 | module Pipely 4 | module Build 5 | 6 | # Builds paths to assets, logs, and steps that are on S3. 7 | # 8 | class S3PathBuilder 9 | 10 | START_TIME = "\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}" 11 | START_DATE = "\#{format(@scheduledStartTime,'YYYY-MM-dd')}" 12 | 13 | # options[:templates] should contain a Hash of your desired S3 path 14 | # patterns, formatted for Pathology. The remainder of the options Hash 15 | # serves as interpolation values for the templates. 16 | # 17 | # Several additional interpolation variables (:protocol, :timestamp, 18 | # :datestamp) are provided by S3PathBuilder at interpolation time. 19 | # 20 | # If options[:templates] is not present, or if it is missing any of the 21 | # legacy templates (assets, logs, steps, etc.), they will be 22 | # automatically built, using bucket names found in the options Hash, 23 | # preserving the original behavior. 24 | # 25 | def initialize(options) 26 | @options = options.merge({ 27 | timestamp: START_TIME, 28 | datestamp: START_DATE, 29 | }) 30 | 31 | @path_templates = default_templates 32 | 33 | if templates = @options.delete(:templates) 34 | @path_templates.merge!(templates) 35 | end 36 | end 37 | 38 | # Support legacy interface, wherein config simply contained bucket names, 39 | # and users were forced to abide by Pipely's somewhat arbitrary path 40 | # structure. 41 | # 42 | def default_templates 43 | assets, logs, steps = @options.values_at(:assets, :logs, :steps) 44 | 45 | { 46 | asset: ":protocol://#{assets}/:prefix/:timestamp", 47 | log: ":protocol://#{logs}/:prefix/:timestamp", 48 | step: ":protocol://#{steps}/:prefix", 49 | shared_asset: ":protocol://#{assets}/:prefix/shared/:datestamp", 50 | bucket_relative_asset: ':prefix/:timestamp', 51 | } 52 | end 53 | 54 | # Implement path interpolation methods, e.g. s3_log_prefix, etc. 55 | # 56 | def method_missing(method_name, *args, &block) 57 | case method_name 58 | when /^(s3n?)_(.*)_prefix$/ 59 | if pattern = @path_templates[$2.to_sym] 60 | Pathology.template(pattern).interpolate( 61 | @options.merge({protocol: $1}) 62 | ) 63 | else 64 | super 65 | end 66 | else 67 | super 68 | end 69 | end 70 | 71 | # Re-route legacy method name to the standard format implemented by 72 | # method_missing above. 73 | # 74 | def bucket_relative_s3_asset_prefix 75 | s3_bucket_relative_asset_prefix 76 | end 77 | 78 | def to_hash 79 | values = %w(s3 s3n).flat_map do |protocol| 80 | @path_templates.keys.map do |path_name| 81 | key = "#{protocol}_#{path_name}_prefix".to_sym 82 | [key, send(key)] 83 | end 84 | end 85 | 86 | # Support legacy method name. 87 | Hash[values].merge({ 88 | bucket_relative_s3_asset_prefix: bucket_relative_s3_asset_prefix 89 | }) 90 | end 91 | 92 | end 93 | 94 | end 95 | end 96 | -------------------------------------------------------------------------------- /lib/pipely/build/template.rb: -------------------------------------------------------------------------------- 1 | require 'active_support/core_ext/hash' 2 | require 'erb' 3 | 4 | require 'pipely/build/template_helpers' 5 | 6 | module Pipely 7 | module Build 8 | 9 | # An ERB template that can be interpolated with config hashes to render a 10 | # deployable pipeline definition. 11 | # 12 | class Template 13 | include TemplateHelpers 14 | 15 | attr_accessor :pipeline_id 16 | 17 | def initialize(source) 18 | @source = source 19 | @config = {} 20 | end 21 | 22 | def apply_config(attributes) 23 | @config.merge!(attributes.symbolize_keys) 24 | end 25 | 26 | def to_json 27 | ERB.new(@source).result(binding) 28 | end 29 | 30 | def respond_to_missing(method_name, include_private=false) 31 | @config.keys.include?(method_name.to_s) || super 32 | end 33 | 34 | def method_missing(method_name, *args, &block) 35 | if @config.keys.include?(method_name) 36 | @config[method_name] 37 | else 38 | super 39 | end 40 | end 41 | 42 | end 43 | 44 | end 45 | end 46 | -------------------------------------------------------------------------------- /lib/pipely/build/template_helpers.rb: -------------------------------------------------------------------------------- 1 | module Pipely 2 | module Build 3 | 4 | # Helper methods used by ERB templates. 5 | # 6 | module TemplateHelpers 7 | 8 | def s3_asset_path(path) 9 | "#{s3_asset_prefix if '/' == path[0]}#{path}" 10 | end 11 | 12 | def s3n_asset_path(path) 13 | "#{s3n_asset_prefix if '/' == path[0]}#{path}" 14 | end 15 | 16 | def s3n_step_path(path) 17 | "#{s3n_step_prefix if '/' == path[0]}#{path}" 18 | end 19 | 20 | def streaming_hadoop_step(options) 21 | parts = [ '/home/hadoop/contrib/streaming/hadoop-streaming.jar' ] 22 | 23 | if jars = options[:lib_jars] 24 | parts += Array(jars).map { |jar| ['-libjars', "#{jar}"] }.flatten 25 | end 26 | 27 | (options[:defs] || {}).each do |name, value| 28 | parts += ['-D', "#{name}=#{value}".gsub(',', "\\,")] 29 | end 30 | 31 | Array(options[:input]).each do |input| 32 | parts += [ '-input', s3n_asset_path(input) ] 33 | end 34 | 35 | Array(options[:output]).each do |output| 36 | parts += ['-output', s3_asset_path(output) ] 37 | end 38 | 39 | if options[:outputformat] 40 | parts += ['-outputformat', options[:outputformat] ] 41 | end 42 | 43 | if options[:partitioner] 44 | parts += ['-partitioner', options[:partitioner] ] 45 | end 46 | 47 | Array(options[:mapper]).each do |mapper| 48 | parts += ['-mapper', s3n_step_path(mapper) ] 49 | end 50 | 51 | Array(options[:reducer]).each do |reducer| 52 | parts += ['-reducer', s3n_step_path(reducer) ] 53 | end 54 | 55 | Array(options[:cache_file]).each do |cache_file| 56 | parts += ['-cacheFile', s3n_asset_path(cache_file)] 57 | end 58 | 59 | (options[:env] || {}).each do |name, value| 60 | parts += ['-cmdenv', "#{name}=#{value}"] 61 | end 62 | 63 | parts.join(',') 64 | end 65 | 66 | end 67 | 68 | end 69 | end 70 | -------------------------------------------------------------------------------- /lib/pipely/bundler.rb: -------------------------------------------------------------------------------- 1 | require 'pipely/bundler/bundle' 2 | require 'pipely/bundler/gem_packager' 3 | require 'pipely/bundler/project_gem' 4 | 5 | module Pipely 6 | 7 | # 8 | # Module for packaging up a gem project and its dependencies, as they exist 9 | # on your machine, for deployment. 10 | # 11 | # None of this code is specific to AWS Data Pipelines, and it could be used 12 | # anywhere else you want to an in-development gem with frozen dependencies. 13 | # 14 | module Bundler 15 | 16 | # List all the gems used in this project in the format: 17 | # 18 | # { name => path_to_cache_file } 19 | # 20 | # For gems that are git- or path-sourced, it will first build a fresh cache 21 | # file for the gem. 22 | # 23 | def self.gem_files(vendor_dir='vendor/pipeline') 24 | ProjectGem.load(vendor_dir).gem_files 25 | end 26 | 27 | end 28 | 29 | end 30 | -------------------------------------------------------------------------------- /lib/pipely/bundler/bundle.rb: -------------------------------------------------------------------------------- 1 | require 'fileutils' 2 | 3 | module Pipely 4 | module Bundler 5 | 6 | # 7 | # Provides access to a bundle's list of gems 8 | # 9 | class Bundle 10 | 11 | attr_reader :spec_set 12 | 13 | SOURCE_TYPES = %w[Bundler::Source::Git Bundler::Source::Path] 14 | 15 | def self.build(vendor_dir, 16 | groups=[:default], 17 | definition=::Bundler.definition) 18 | new( 19 | vendor_dir, 20 | definition.specs_for(groups), 21 | definition.instance_variable_get(:@locked_sources) 22 | ) 23 | end 24 | 25 | def initialize(vendor_dir, spec_set, locked_sources) 26 | @spec_set = spec_set 27 | @locked_sources = locked_sources 28 | @vendor_dir = vendor_dir 29 | unless Dir.exists? @vendor_dir 30 | FileUtils.mkdir_p(@vendor_dir) 31 | end 32 | 33 | end 34 | 35 | def gem_files(opts = {}) 36 | gem_packager = opts[:gem_packager] || GemPackager.new(@vendor_dir) 37 | gems_to_exclude = opts[:gems_to_exclude] || [] 38 | 39 | gem_files = {} 40 | 41 | excluded_gems = lambda { |s| gems_to_exclude.include? s.name } 42 | merge_gem = lambda { |s| gem_files.merge!(gem_file(s, gem_packager)) } 43 | 44 | @spec_set.to_a.reject(&excluded_gems).each(&merge_gem) 45 | 46 | gem_files 47 | end 48 | 49 | private 50 | 51 | def gem_file(spec, gem_packager) 52 | if source = locked_sources_by_name[spec.name] 53 | gem_packager.build_from_source(source.name, source.path) 54 | else 55 | gem_packager.package(spec) 56 | end 57 | end 58 | 59 | def locked_sources_by_name 60 | return @locked_sources_by_name if @locked_sources_by_name 61 | 62 | @locked_sources_by_name = {} 63 | 64 | @locked_sources.each do |source| 65 | # Only include git or path sources. 66 | if SOURCE_TYPES.include?(source.class.name) 67 | @locked_sources_by_name[source.name] = source 68 | end 69 | end 70 | 71 | locked_sources_by_name 72 | end 73 | 74 | end 75 | 76 | end 77 | end 78 | -------------------------------------------------------------------------------- /lib/pipely/bundler/gem_packager.rb: -------------------------------------------------------------------------------- 1 | require 'fileutils' 2 | require 'rubygems/remote_fetcher' 3 | 4 | module Pipely 5 | module Bundler 6 | 7 | # 8 | # Builds cache files for git- or path-sourced gems. 9 | # 10 | class GemPackager 11 | 12 | # Alert upon gem-building failures 13 | class GemBuildError < RuntimeError ; end 14 | 15 | # Alert upon gem-fetching failures 16 | class GemFetchError < RuntimeError ; end 17 | 18 | def initialize(vendor_dir) 19 | @vendor_dir = vendor_dir 20 | unless Dir.exists? @vendor_dir 21 | FileUtils.mkdir_p(@vendor_dir) 22 | end 23 | end 24 | 25 | def package(spec) 26 | if vendored_gem = vendor_local_gem(spec) 27 | vendored_gem 28 | 29 | # Finally, some gems do not exist in the cache or as source. For 30 | # instance, json is shipped with the ruby dist. Try to fetch directly 31 | # from rubygems. 32 | else 33 | gem_file_name = "#{spec.name}-#{spec.version}.gem" 34 | { spec.name => download_from_rubygems(gem_file_name)} 35 | end 36 | end 37 | 38 | def vendor_local_gem(spec) 39 | gem_file = spec.cache_file 40 | vendored_gem = File.join( @vendor_dir, File.basename(gem_file) ) 41 | 42 | if File.exists?(vendored_gem) 43 | { spec.name => vendored_gem } 44 | 45 | # Gem exists in the local ruby gems cache 46 | elsif File.exists? gem_file 47 | 48 | # Copy to vendor dir 49 | FileUtils.cp(gem_file, vendored_gem) 50 | 51 | { spec.name => vendored_gem } 52 | 53 | # If source exists, build a gem from it 54 | elsif File.directory?(spec.gem_dir) 55 | build_from_source(spec.name, spec.gem_dir) 56 | else 57 | nil 58 | end 59 | end 60 | 61 | def build_from_source(spec_name, source_path) 62 | gem_spec_path = "#{spec_name}.gemspec" 63 | 64 | # Build the gemspec 65 | gem_spec = Gem::Specification::load( 66 | File.join(source_path,gem_spec_path)) 67 | 68 | gem_file = build_gem(spec_name, source_path) 69 | 70 | # Move to vendor dir 71 | FileUtils.mv( 72 | File.join(source_path,gem_file), 73 | File.join(@vendor_dir,gem_file)) 74 | 75 | { gem_spec.name => File.join(@vendor_dir, gem_file) } 76 | end 77 | 78 | def build_gem(spec_name, source_path) 79 | gem_spec_path = "#{spec_name}.gemspec" 80 | 81 | Dir.chdir(source_path) do 82 | result = `gem build #{gem_spec_path} 2>&1` 83 | 84 | if result =~ /ERROR/i 85 | raise GemBuildError.new( 86 | "Failed to build #{gem_spec_path} \n" << result) 87 | else 88 | result.scan( 89 | /File:(.+.gem)$/).flatten.first.strip 90 | end 91 | end 92 | end 93 | 94 | def download_from_rubygems(gem_file_name) 95 | vendored_gem = File.join( @vendor_dir, gem_file_name ) 96 | 97 | # XXX: add link on wiki details what is going on here 98 | puts "Fetching gem #{gem_file_name} directly from rubygems, most " + 99 | "likely this gem was packaged along with your ruby " + 100 | "distrubtion, for more details see LINK" 101 | 102 | ruby_gem_url = "https://rubygems.org/downloads/#{gem_file_name}" 103 | 104 | fetcher = Gem::RemoteFetcher.new 105 | gem_data = fetcher.fetch_path(ruby_gem_url) 106 | IO.binwrite(vendored_gem, gem_data) 107 | 108 | vendored_gem 109 | end 110 | end 111 | end 112 | end 113 | -------------------------------------------------------------------------------- /lib/pipely/bundler/project_gem.rb: -------------------------------------------------------------------------------- 1 | module Pipely 2 | module Bundler 3 | 4 | # 5 | # Builds the project's gem from gemspec and pulls in its dependencies via 6 | # the gem's bundle. 7 | # 8 | class ProjectGem 9 | 10 | attr_reader :project_spec 11 | 12 | def self.load(vendor_dir) 13 | if gem_spec = Dir.glob("*.gemspec").first 14 | # project gem spec 15 | new(Gem::Specification::load(gem_spec), vendor_dir) 16 | else 17 | raise "Failed to find gemspec" 18 | end 19 | end 20 | 21 | def initialize(project_spec, vendor_dir) 22 | @project_spec = project_spec 23 | @vendor_dir = vendor_dir 24 | unless Dir.exists? @vendor_dir 25 | FileUtils.mkdir_p(@vendor_dir) 26 | end 27 | end 28 | 29 | def gem_files 30 | # Project gem should be at the bottom of the dep list 31 | @gem_files ||= dependency_gem_files.merge(project_gem_file) 32 | end 33 | 34 | def dependency_gem_files(bundle=Bundle.build(@vendor_dir)) 35 | # Always exclude bundler and the project gem 36 | gems_to_exclude = [ @project_spec.name, 'bundler' ] 37 | 38 | bundle.gem_files(gems_to_exclude: gems_to_exclude) 39 | end 40 | 41 | def project_gem_file(gem_packager=GemPackager.new(@vendor_dir)) 42 | gem_packager.build_from_source(@project_spec.name, Dir.pwd) 43 | end 44 | 45 | end 46 | end 47 | end 48 | -------------------------------------------------------------------------------- /lib/pipely/component.rb: -------------------------------------------------------------------------------- 1 | require 'virtus' 2 | require 'pipely/reference_list' 3 | 4 | module Pipely 5 | 6 | # Represents a Component within a Data Pipeline Definition 7 | # http://amzn.to/16lbBKx 8 | # 9 | class Component 10 | 11 | REFERENCE_KEYS = [ 12 | 'dependsOn', 13 | 'input', 14 | 'output', 15 | 'runsOn', 16 | 'schedule', 17 | 'onFail', 18 | 'onSuccess', 19 | 'dataFormat', 20 | 'precondition', 21 | ] 22 | 23 | STATE_COLORS = { 24 | 'FINISHED' => 'deepskyblue1', 25 | 'RUNNING' => 'chartreuse', 26 | 'WAITING_ON_DEPENDENCIES' => 'gray', 27 | 'WAITING_FOR_RUNNER' => 'bisque4', 28 | 'FAILED' => 'orangered', 29 | } 30 | 31 | include Virtus.model 32 | 33 | attribute :id, String 34 | attribute :type, String 35 | attribute :color, String 36 | attribute :execution_state, String 37 | 38 | attribute :dependsOn, ReferenceList 39 | attribute :input, ReferenceList 40 | attribute :output, ReferenceList 41 | attribute :runsOn, ReferenceList 42 | attribute :schedule, ReferenceList 43 | attribute :onFail, ReferenceList 44 | attribute :onSuccess, ReferenceList 45 | attribute :dataFormat, ReferenceList 46 | attribute :precondition, ReferenceList 47 | 48 | def initialize(args) 49 | @original_args = args.clone 50 | super 51 | coerce_references 52 | end 53 | 54 | def coerce_references 55 | REFERENCE_KEYS.each do |key| 56 | value = send(key) 57 | unless value.is_a?(ReferenceList) 58 | send("#{key}=", ReferenceList.new(value)) 59 | end 60 | end 61 | end 62 | 63 | def graphviz_options 64 | { 65 | :shape => 'record', 66 | :label => "{#{label}}", 67 | :color => color || 'black', 68 | :fillcolor => STATE_COLORS[execution_state] || 'white', 69 | :style => 'filled', 70 | } 71 | end 72 | 73 | def dependencies(scope=nil) 74 | deps = dependsOn.build_dependencies('dependsOn') + 75 | precondition.build_dependencies('precondition') + 76 | input.build_dependencies('input') + 77 | output.build_dependencies('output') 78 | 79 | if :all == scope 80 | deps += runsOn.build_dependencies(:runsOn) 81 | deps += schedule.build_dependencies(:schedule) 82 | deps += onFail.build_dependencies(:onFail) 83 | deps += onSuccess.build_dependencies(:onSuccess) 84 | deps += dataFormat.build_dependencies(:dataFormat) 85 | end 86 | 87 | deps 88 | end 89 | 90 | def to_json(options={}, depth=0) 91 | h = @original_args 92 | 93 | REFERENCE_KEYS.each do |key| 94 | value = send(key) 95 | 96 | if value.present? 97 | h[key] = value 98 | else 99 | h.delete(key) 100 | end 101 | end 102 | 103 | h.to_json(options) 104 | end 105 | 106 | private 107 | 108 | def label 109 | [id, type, execution_state].compact.join('|') 110 | end 111 | 112 | end 113 | 114 | end 115 | -------------------------------------------------------------------------------- /lib/pipely/definition.rb: -------------------------------------------------------------------------------- 1 | require 'json' 2 | require 'pipely/component' 3 | 4 | module Pipely 5 | 6 | # Pipely's representation of a Pipeline Definition for AWS Data Pipeline 7 | # http://amzn.to/1bpW8Ru 8 | # 9 | class Definition 10 | 11 | # Showing all component types leads to an unwieldy graph. 12 | # TODO: make this list configurable. 13 | NON_GRAPH_COMPONENT_TYPES = [ 14 | 'Schedule', 15 | 'SnsAlarm', 16 | 'Ec2Resource', 17 | 'EmrCluster', 18 | 'CSV', 19 | nil, 20 | ] 21 | 22 | def self.parse(content) 23 | objects = JSON.parse(content)['objects'] 24 | components = objects.map{|obj| Component.new(obj)} 25 | 26 | new(components) 27 | end 28 | 29 | def initialize(components) 30 | @components = components 31 | end 32 | 33 | attr_reader :components 34 | 35 | def components_for_graph 36 | components.reject { |component| 37 | NON_GRAPH_COMPONENT_TYPES.include?(component['type']) 38 | } 39 | end 40 | 41 | def to_json 42 | { :objects => components }.to_json 43 | end 44 | 45 | def apply_component_attributes(component_attributes) 46 | self.components.each do |component| 47 | if attributes = component_attributes[component.id] 48 | component.attributes = attributes 49 | end 50 | end 51 | end 52 | 53 | private 54 | 55 | def get_components(target_component_ids) 56 | components.select { |component| 57 | target_component_ids.include?(component.id) 58 | } 59 | end 60 | 61 | def dependencies_of(selected_components) 62 | all_dependencies = selected_components.map { |component| 63 | component.dependencies(:all) 64 | }.flatten.uniq 65 | 66 | Set.new(get_components(all_dependencies.map(&:target_id))) 67 | end 68 | 69 | end 70 | 71 | end 72 | -------------------------------------------------------------------------------- /lib/pipely/dependency.rb: -------------------------------------------------------------------------------- 1 | module Pipely 2 | 3 | # Represents a dependency from one Component on another 4 | # http://amzn.to/16lbBKx 5 | # 6 | class Dependency < Struct.new(:label, :target_id, :color) 7 | 8 | def color 9 | super || 'black' 10 | end 11 | 12 | end 13 | 14 | end 15 | -------------------------------------------------------------------------------- /lib/pipely/deploy.rb: -------------------------------------------------------------------------------- 1 | require 'pipely/deploy/client' 2 | -------------------------------------------------------------------------------- /lib/pipely/deploy/bootstrap.rb: -------------------------------------------------------------------------------- 1 | require 'pipely/bundler' 2 | require 'pipely/deploy/bootstrap_context' 3 | require 'pipely/deploy/bootstrap_registry' 4 | require 'pipely/deploy/s3_uploader' 5 | require 'active_support/core_ext/string/conversions' 6 | 7 | module Pipely 8 | module Deploy 9 | 10 | # Helps bootstrap a pipeline 11 | class Bootstrap 12 | 13 | attr_reader :gem_files, :s3_steps_path 14 | 15 | def initialize(gem_files, s3_steps_path) 16 | @gem_files = gem_files 17 | @s3_steps_path = s3_steps_path 18 | end 19 | 20 | def context(*mixins) 21 | bootstrap_mixins = BootstrapRegistry.instance.register_mixins(mixins) 22 | 23 | BootstrapContext.class_eval do 24 | bootstrap_mixins.each do |mixin| 25 | puts "Adding bootstrap mixin #{mixin}" 26 | include mixin.constantize 27 | end 28 | self 29 | end.new.tap do |context| 30 | context.gem_files = gem_files 31 | context.s3_steps_path = s3_steps_path 32 | end 33 | end 34 | 35 | end 36 | 37 | end 38 | end 39 | -------------------------------------------------------------------------------- /lib/pipely/deploy/bootstrap_context.rb: -------------------------------------------------------------------------------- 1 | 2 | module Pipely 3 | module Deploy 4 | 5 | # Context passed to the erb templates, providers helpers for 6 | # common bootstraping activities for emr and ec2 instances. 7 | # 8 | # bootstrap.ec2.install_gems_script 9 | # bootstrap.emr.install_gems_script 10 | # 11 | class BootstrapContext 12 | attr_accessor :gem_files, :s3_steps_path 13 | attr_reader :ec2, :emr 14 | 15 | # Context for EMR instances 16 | class EmrContext 17 | def initialize(parent) 18 | @parent = parent 19 | end 20 | 21 | def install_gems_script(&blk) 22 | @parent.install_gems_script(:hadoop_fs, &blk) 23 | end 24 | end 25 | 26 | # Context for EC2 instances 27 | class Ec2Context 28 | def initialize(parent) 29 | @parent = parent 30 | @ssh_initialized = false 31 | end 32 | 33 | def install_gems_script(&blk) 34 | @parent.install_gems_script(:awscli, &blk) 35 | end 36 | 37 | def as_root(init_ssh=true) 38 | script = "" 39 | 40 | if init_ssh && !@ssh_initialized 41 | @ssh_initialized = true 42 | script << %{ 43 | # Set up ssh access 44 | if [ ! -f ~/.ssh/id_rsa ]; then 45 | mkdir -p ~/.ssh 46 | ssh-keygen -P '' -f ~/.ssh/id_rsa 47 | cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys 48 | chmod 600 ~/.ssh/authorized_keys 49 | fi 50 | } 51 | end 52 | 53 | script << %{ 54 | # Use ssh to bypass the sudo "require tty" setting 55 | ssh -o "StrictHostKeyChecking no" -t -t ec2-user@localhost <<- EOF 56 | sudo su -; 57 | } 58 | 59 | # The yield to be run as root 60 | script << yield 61 | 62 | script << %{ 63 | # exit twice, once for su and once for ssh 64 | exit; 65 | exit; 66 | EOF 67 | } 68 | end 69 | end 70 | 71 | def initialize 72 | @emr = EmrContext.new(self) 73 | @ec2 = Ec2Context.new(self) 74 | end 75 | 76 | def fetch_command(transport) 77 | case transport.to_sym 78 | when :hadoop_fs 79 | 'hadoop fs -copyToLocal' 80 | when :awscli 81 | 'aws s3 cp' 82 | end 83 | end 84 | 85 | def install_gems_script(transport, &blk) 86 | 87 | transport_cmd = fetch_command(transport) 88 | 89 | if transport_cmd.nil? 90 | raise "Unsupported transport: #{transport}" unless blk 91 | end 92 | 93 | script = "" 94 | @gem_files.each do |gem_file| 95 | filename = File.basename(gem_file) 96 | params = [transport_cmd, gem_file, filename] 97 | if blk 98 | command = yield(*params) 99 | else 100 | command = params.join(" ") 101 | end 102 | 103 | script << %Q[ 104 | # #{filename} 105 | #{command} 106 | gem install --force --local #{filename} --no-ri --no-rdoc 107 | ] 108 | end 109 | 110 | script 111 | end 112 | end 113 | end 114 | end 115 | -------------------------------------------------------------------------------- /lib/pipely/deploy/bootstrap_registry.rb: -------------------------------------------------------------------------------- 1 | require 'singleton' 2 | require 'active_support/core_ext/string/conversions' 3 | 4 | module Pipely 5 | module Deploy 6 | 7 | # 8 | ## Registry of Mixins to be applied to the bootstrap context 9 | # 10 | class BootstrapRegistry 11 | include Singleton 12 | 13 | def initialize 14 | @mixins = [] 15 | end 16 | 17 | class << self 18 | def register_mixins(*mixins) 19 | instance.register_mixins(*mixins) 20 | end 21 | 22 | def mixins 23 | instance.mixins 24 | end 25 | end 26 | 27 | def register_mixins(*mixins) 28 | new_mixins = [mixins].flatten.compact 29 | 30 | new_mixins.each do |mixin| 31 | begin 32 | require mixin.underscore 33 | rescue LoadError => e 34 | raise "Failed to require #{mixin} for bootstrap_contexts: #{e}" 35 | end 36 | end 37 | @mixins = (@mixins + new_mixins).uniq 38 | end 39 | 40 | def mixins 41 | @mixins 42 | end 43 | end 44 | end 45 | end 46 | -------------------------------------------------------------------------------- /lib/pipely/deploy/client.rb: -------------------------------------------------------------------------------- 1 | require 'aws-sdk' 2 | require 'logger' 3 | require 'tempfile' 4 | require 'securerandom' 5 | require 'pipely/deploy/json_definition' 6 | 7 | module Pipely 8 | module Deploy 9 | 10 | # Client for managing deployment of rendered definitions. 11 | # 12 | class Client 13 | 14 | attr_reader :base_tags 15 | 16 | # Generic error representing failure to deploy a rendered definition. 17 | class PipelineDeployerError < RuntimeError; end 18 | 19 | def initialize(log=nil) 20 | @log = log || Logger.new(STDOUT) 21 | @aws = Aws::DataPipeline::Client.new 22 | @base_tags = { 23 | "environment" => ENV['env'], 24 | "creator" => ENV['USER'] 25 | } 26 | end 27 | 28 | def deploy_pipeline(pipeline_basename, definition = nil, &block) 29 | pipeline_name = pipeline_name(pipeline_basename) 30 | 31 | tags = base_tags.merge( 32 | "basename" => pipeline_basename, 33 | "deploy_id" => SecureRandom.uuid ) 34 | 35 | # Get a list of all existing pipelines 36 | pipeline_ids = existing_pipelines(pipeline_name) 37 | @log.info("#{pipeline_ids.count} existing pipelines: #{pipeline_ids}") 38 | 39 | # Create new pipeline 40 | created_pipeline_id = create_pipeline( 41 | pipeline_name, definition, tags, &block 42 | ) 43 | 44 | if created_pipeline_id 45 | @log.info("Created pipeline id '#{created_pipeline_id}'") 46 | 47 | # Delete old pipelines 48 | pipeline_ids.each do |pipeline_id| 49 | begin 50 | delete_pipeline(pipeline_id) 51 | @log.info("Deleted pipeline '#{pipeline_id}'") 52 | 53 | rescue PipelineDeployerError => error 54 | @log.warn(error) 55 | end 56 | end 57 | end 58 | 59 | created_pipeline_id 60 | end 61 | 62 | def existing_pipelines(pipeline_name) 63 | ids = [] 64 | marker = nil 65 | 66 | begin 67 | options = marker ? { marker: marker } : {} 68 | result = @aws.list_pipelines(options) 69 | 70 | ids += result[:pipeline_id_list]. 71 | select { |p| p[:name] == pipeline_name }. 72 | map { |p| p[:id] } 73 | 74 | end while (result[:has_more_results] && marker = result[:marker]) 75 | 76 | ids 77 | end 78 | 79 | def create_pipeline(pipeline_name, definition, tags={}) 80 | created_pipeline = @aws.create_pipeline( 81 | name: pipeline_name, 82 | unique_id: tags['deploy_id'] || SecureRandom.uuid, 83 | description: "Pipely Deployed Data Pipeline", 84 | tags: base_tags.merge(tags).map do |k,v| 85 | { key: k, value: v } unless v.nil? 86 | end.compact, 87 | ) 88 | 89 | definition ||= yield(created_pipeline.pipeline_id) if block_given? 90 | 91 | response = @aws.put_pipeline_definition( 92 | pipeline_id: created_pipeline.pipeline_id, 93 | pipeline_objects: JSONDefinition.parse(definition) 94 | ) 95 | 96 | activate_pipeline(response, created_pipeline) 97 | end 98 | 99 | def activate_pipeline(response, pipeline) 100 | if response[:errored] 101 | @log.error("Failed to put pipeline definition.") 102 | @log.error(response[:validation_errors].inspect) 103 | false 104 | else 105 | @aws.activate_pipeline(pipeline_id: pipeline.pipeline_id) 106 | pipeline.pipeline_id 107 | end 108 | end 109 | 110 | def delete_pipeline(pipeline_id) 111 | @aws.delete_pipeline(pipeline_id: pipeline_id) 112 | end 113 | 114 | private 115 | 116 | def pipeline_name(basename) 117 | [ 118 | ('P' if ENV['env'] == 'production'), 119 | ENV['USER'], 120 | basename 121 | ].compact.join(':') 122 | end 123 | end 124 | end 125 | end 126 | -------------------------------------------------------------------------------- /lib/pipely/deploy/json_definition.rb: -------------------------------------------------------------------------------- 1 | require 'json' 2 | 3 | module Pipely 4 | module Deploy 5 | 6 | # The JSON definition format expected by the CLI differs from the structure 7 | # expected by the API. This class transforms a CLI-ready definition into 8 | # the pipeline object hashes expected by the API. 9 | # 10 | class JSONDefinition 11 | def self.parse(definition) 12 | definition_objects = 13 | JSON.parse(definition, symbolize_names: true)[:objects] 14 | definition_objects.map { |object| new(object).to_api } 15 | end 16 | 17 | def initialize(object) 18 | @json_fields = object.clone 19 | @id = @json_fields.delete(:id) 20 | @name = @json_fields.delete(:name) || @id 21 | end 22 | 23 | def to_api 24 | { 25 | id: @id, 26 | name: @name, 27 | fields: fields 28 | } 29 | end 30 | 31 | private 32 | 33 | def fields 34 | @json_fields.map{|k,v| field_for_kv(k,v)}.flatten 35 | end 36 | 37 | def field_for_kv(key, value) 38 | if value.is_a?(Hash) 39 | { key: key, ref_value: value[:ref] } 40 | 41 | elsif value.is_a?(Array) 42 | value.map { |subvalue| field_for_kv(key, subvalue) } 43 | 44 | else 45 | { key: key, string_value: value } 46 | 47 | end 48 | end 49 | end 50 | 51 | end 52 | end 53 | -------------------------------------------------------------------------------- /lib/pipely/deploy/s3_uploader.rb: -------------------------------------------------------------------------------- 1 | require 'digest/md5' 2 | 3 | module Pipely 4 | module Deploy 5 | 6 | # 7 | # Manage syncing of local files to a particular S3 path 8 | # 9 | class S3Uploader 10 | 11 | attr_reader :bucket_name 12 | attr_reader :s3_path 13 | 14 | def initialize(s3_bucket, s3_path) 15 | @s3_bucket = s3_bucket 16 | @bucket_name = s3_bucket.name 17 | @s3_path = s3_path 18 | end 19 | 20 | def s3_file_path(file) 21 | filename = File.basename(file) 22 | File.join(@s3_path, filename) 23 | end 24 | 25 | def s3_urls(files) 26 | files.map do |file| 27 | File.join("s3://", @s3_bucket.name, s3_file_path(file) ) 28 | end 29 | end 30 | 31 | def upload(files) 32 | files.each do |file| 33 | upload_file(file) 34 | end 35 | end 36 | 37 | # 38 | # Upload file to S3 unless ETAGs already match. 39 | # 40 | def upload_file(file) 41 | target_path = s3_file_path(file) 42 | s3_object = @s3_bucket.object(target_path) 43 | 44 | content = File.read(file) 45 | digest = Digest::MD5.hexdigest(content) 46 | 47 | if s3_object.exists? && (digest == s3_object.etag.gsub('"', '')) 48 | puts "skipping #{file} to #{target_path} (ETAG matches)" 49 | else 50 | puts "uploading #{file} to #{target_path}" 51 | s3_object.put(body: content) 52 | end 53 | end 54 | 55 | end 56 | 57 | end 58 | end 59 | -------------------------------------------------------------------------------- /lib/pipely/graph_builder.rb: -------------------------------------------------------------------------------- 1 | require 'json' 2 | require 'graphviz' 3 | 4 | module Pipely 5 | 6 | # Builds a GraphViz graph from a set of Components and their Dependencies 7 | class GraphBuilder 8 | 9 | def initialize(graph=nil) 10 | @graph = graph || GraphViz.new(:G, :type => :digraph) 11 | end 12 | 13 | def build(components) 14 | add_nodes(components) 15 | add_edges(components) 16 | @graph 17 | end 18 | 19 | private 20 | 21 | # Represent Components as nodes on the graph 22 | def add_nodes(components) 23 | components.each do |component| 24 | @graph.add_nodes(component.id, component.graphviz_options) 25 | end 26 | end 27 | 28 | # Represent Dependencies as edges on the graph 29 | def add_edges(components) 30 | components.each do |component| 31 | component.dependencies.each do |dependency| 32 | add_edge(component, dependency) 33 | end 34 | end 35 | end 36 | 37 | def add_edge(component, dependency) 38 | options = { 39 | :label => dependency.label, 40 | :color => dependency.color, 41 | } 42 | 43 | options[:dir] = 'back' if ('input' == dependency.label) 44 | 45 | if 'output' == dependency.label 46 | @graph.add_edges( 47 | dependency.target_id, 48 | component.id, 49 | options 50 | ) 51 | else 52 | @graph.add_edges( 53 | component.id, 54 | dependency.target_id, 55 | options 56 | ) 57 | end 58 | end 59 | 60 | end 61 | 62 | end 63 | 64 | -------------------------------------------------------------------------------- /lib/pipely/live_pipeline.rb: -------------------------------------------------------------------------------- 1 | require 'pipely/runs_report' 2 | 3 | module Pipely 4 | 5 | # Represent a pipeline that has been deployed to AWS DataPipeline 6 | class LivePipeline 7 | attr_reader :pipeline_id 8 | 9 | def initialize(pipeline_id) 10 | @pipeline_id = pipeline_id 11 | 12 | @definition_json = definition(pipeline_id) 13 | @task_states_by_scheduled_start = task_states_by_scheduled_start 14 | 15 | unless @definition_json 16 | raise "No definition found for #{client.pipeline_id}" 17 | end 18 | 19 | if @task_states_by_scheduled_start.empty? 20 | raise "No runs found for #{client.pipeline_id}" 21 | end 22 | end 23 | 24 | def print_runs_report 25 | RunsReport.new(@task_states_by_scheduled_start).print 26 | end 27 | 28 | def render_latest_graph(output_path=nil) 29 | latest_start = @task_states_by_scheduled_start.keys.max 30 | task_states = @task_states_by_scheduled_start[latest_start] 31 | render_graph(latest_start, task_states, output_path) 32 | end 33 | 34 | def render_graphs(output_path=nil) 35 | @task_states_by_scheduled_start.map do |start, task_states| 36 | render_graph(start, task_states, output_path) 37 | end 38 | end 39 | 40 | private 41 | 42 | def data_pipeline 43 | @data_pipeline ||= Aws::DataPipeline::Client.new 44 | end 45 | 46 | def render_graph(start, task_states, output_path) 47 | utc_time = Time.now.to_i 48 | formatted_start = start.gsub(/[:-]/, '').sub('T', '-') 49 | 50 | output_base = "#{@pipeline_id}-#{formatted_start}-#{utc_time}.png" 51 | filename = File.join((output_path || 'graphs'), output_base) 52 | 53 | Pipely.draw(@definition_json, filename, task_states) 54 | end 55 | 56 | def definition(pipeline_id) 57 | objects = data_pipeline.get_pipeline_definition(pipeline_id: pipeline_id) 58 | { objects: flatten_pipeline_objects(objects.pipeline_objects) }.to_json 59 | end 60 | 61 | def task_states_by_scheduled_start 62 | task_states_by_scheduled_start = {} 63 | 64 | all_instances.each do |pipeline_object| 65 | component_id = status = scheduled_start = nil 66 | 67 | pipeline_object.fields.each do |field| 68 | case field.key 69 | when '@componentParent' 70 | component_id = field.ref_value 71 | when '@status' 72 | status = field.string_value 73 | when '@scheduledStartTime' 74 | scheduled_start = field.string_value 75 | end 76 | end 77 | 78 | task_states_by_scheduled_start[scheduled_start] ||= {} 79 | task_states_by_scheduled_start[scheduled_start][component_id] = { 80 | execution_state: status 81 | } 82 | end 83 | 84 | task_states_by_scheduled_start 85 | end 86 | 87 | def all_instances 88 | pipeline_objects = [] 89 | marker = nil 90 | 91 | begin 92 | result = data_pipeline.query_objects( 93 | pipeline_id: pipeline_id, 94 | sphere: "INSTANCE", 95 | marker: marker, 96 | ) 97 | 98 | marker = result.marker 99 | 100 | instance_details = data_pipeline.describe_objects( 101 | pipeline_id: pipeline_id, 102 | object_ids: result.ids 103 | ) 104 | 105 | data_pipeline.describe_objects( 106 | pipeline_id: pipeline_id, 107 | object_ids: result.ids 108 | ) 109 | pipeline_objects += instance_details.pipeline_objects 110 | 111 | end while (result.has_more_results && marker) 112 | 113 | pipeline_objects 114 | end 115 | 116 | 117 | def flatten_pipeline_objects(objects) 118 | objects.each_with_object([]) do |object, result| 119 | h = { 120 | id: object.id, 121 | name: object.name, 122 | } 123 | 124 | object.fields.each do |field| 125 | k = field.key 126 | if field.ref_value 127 | h[k] ||= [] 128 | h[k] << { ref: field.ref_value } 129 | else 130 | h[k] = field.string_value 131 | end 132 | end 133 | 134 | result << h 135 | end 136 | end 137 | end 138 | end 139 | -------------------------------------------------------------------------------- /lib/pipely/options.rb: -------------------------------------------------------------------------------- 1 | require 'optparse' 2 | 3 | module Pipely 4 | 5 | # Options for running the CLI 6 | # 7 | class Options 8 | 9 | attr_accessor :pipeline_id, :input_path, :output_path, 10 | :verbose, :automatic_open, :json_output, :latest_run 11 | 12 | def self.parse 13 | options = Pipely::Options.new 14 | 15 | OptionParser.new do |opts| 16 | opts.banner = "Usage: pipely [options]" 17 | 18 | opts.on("-p", "--pipeline-id PIPELINE_ID", 19 | "ID of a live pipeline to visualize with live statuses") do |id| 20 | options.pipeline_id = id 21 | end 22 | 23 | opts.on("-l", "--latest", "Graph only the latest run") do |latest| 24 | options.latest_run = latest 25 | end 26 | 27 | opts.on("-i", "--input PATH", 28 | "Path to a JSON pipeline definition file to visualize") do |input| 29 | options.input_path = input 30 | end 31 | 32 | opts.on("-o", "--output PATH", 33 | "Local or S3 path to write Graphviz PNG file(s)") do |output| 34 | options.output_path = output 35 | end 36 | 37 | opts.on("-j", "--json", "Write STDOUT formatted as JSON") do |json| 38 | options.json_output = json 39 | end 40 | end.parse! 41 | 42 | options 43 | end 44 | 45 | end 46 | 47 | end 48 | -------------------------------------------------------------------------------- /lib/pipely/pipeline_date_time/pipeline_date.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | module Pipely 3 | module PipelineDateTime 4 | # Encapsulates AWS pipeline date 5 | # 6 | class PipelineDate 7 | DEFAULT_DAY_FORMAT = 'YYYY/MM/dd' 8 | DEFAULT_MONTH_FORMAT = 'YYYY/MM' 9 | DEFAULT_YEAR_FORMAT = 'YYYY' 10 | 11 | class << self 12 | def day_format=(day_format) 13 | @day_format = day_format 14 | end 15 | 16 | def day_format 17 | @day_format || DEFAULT_DAY_FORMAT 18 | end 19 | 20 | def month_format=(month_format) 21 | @month_format = month_format 22 | end 23 | 24 | def month_format 25 | @month_format || DEFAULT_MONTH_FORMAT 26 | end 27 | 28 | def year_format=(year_format) 29 | @year_format = year_format 30 | end 31 | 32 | def year_format 33 | @year_format || DEFAULT_YEAR_FORMAT 34 | end 35 | end 36 | 37 | def initialize(target_date, days_back) 38 | days_back = days_back.to_i 39 | @date_expression = case 40 | when days_back > 0 41 | "minusDays(#{target_date}, #{days_back})" 42 | when days_back == 0 43 | target_date 44 | else 45 | "plusDays(#{target_date}, #{-days_back})" 46 | end 47 | end 48 | 49 | def day 50 | "\#{format(#{@date_expression}, \"#{PipelineDate.day_format}\")}" 51 | end 52 | 53 | def month 54 | "\#{format(#{@date_expression}, "\ 55 | "\"#{PipelineDate.month_format}\")}/[0-9]+" 56 | end 57 | 58 | def year 59 | "\#{format(#{@date_expression}, "\ 60 | "\"#{PipelineDate.year_format}\")}/[0-9]+/[0-9]+" 61 | end 62 | end 63 | end 64 | end 65 | -------------------------------------------------------------------------------- /lib/pipely/pipeline_date_time/pipeline_date_pattern.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | require 'pipely/pipeline_date_time/pipeline_day_range' 3 | require 'pipely/pipeline_date_time/pipeline_month_range' 4 | require 'pipely/pipeline_date_time/pipeline_year_range' 5 | 6 | module Pipely 7 | module PipelineDateTime 8 | # Mixin for constructing compact date pattern selections 9 | # 10 | module PipelineDatePattern 11 | def date_pattern 12 | selection.target_all_time ? '.*' : any_string(date_pattern_parts) 13 | end 14 | 15 | private 16 | 17 | def date_pattern_parts 18 | day_range.exclude(month_range.start, month_range.end) 19 | month_range.exclude(year_range.start, year_range.end) 20 | day_range.days + month_range.months + year_range.years 21 | end 22 | 23 | def day_range 24 | @day_range ||= PipelineDayRange.new(selection.target_date, num_days, 0) 25 | end 26 | 27 | def month_range 28 | @month_range ||= PipelineMonthRange.new(selection.target_date, num_days, 29 | 0) 30 | end 31 | 32 | def year_range 33 | @year_range ||= PipelineYearRange.new(selection.target_date, num_days, 34 | 0) 35 | end 36 | 37 | def num_days 38 | selection.num_days_back.to_i 39 | end 40 | end 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /lib/pipely/pipeline_date_time/pipeline_date_range_base.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | require 'pipely/pipeline_date_time/pipeline_date' 3 | 4 | module Pipely 5 | module PipelineDateTime 6 | # Base class for pipeline date ranges 7 | # 8 | class PipelineDateRangeBase 9 | attr_reader :days_back 10 | 11 | def initialize(target_date, days_back_start, days_back_end) 12 | @target_date = target_date 13 | @days_back_start = days_back_start 14 | @days_back_end = days_back_end 15 | @days_back = (days_back_end..days_back_start).to_set 16 | end 17 | 18 | def start 19 | @days_back_start 20 | end 21 | 22 | def end 23 | @days_back_end 24 | end 25 | 26 | def exclude(days_back_start, days_back_end) 27 | return if days_back_start < 0 28 | return if days_back_end < 0 29 | return if days_back_start < days_back_end # Back smaller for earlier 30 | (days_back_end..days_back_start).each do |days_back| 31 | @days_back.delete days_back 32 | end 33 | end 34 | 35 | private 36 | 37 | def pipeline_dates 38 | @pipeline_dates ||= @days_back.map do |days_back| 39 | PipelineDate.new(@target_date, days_back) 40 | end 41 | end 42 | end 43 | end 44 | end 45 | -------------------------------------------------------------------------------- /lib/pipely/pipeline_date_time/pipeline_day_range.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | require 'pipely/pipeline_date_time/pipeline_date_range_base' 3 | 4 | module Pipely 5 | module PipelineDateTime 6 | # Class that represents a range of individual pipeline days 7 | # 8 | class PipelineDayRange < PipelineDateRangeBase 9 | def days 10 | @days ||= pipeline_dates.map { |pd| pd.day } 11 | end 12 | end 13 | end 14 | end 15 | -------------------------------------------------------------------------------- /lib/pipely/pipeline_date_time/pipeline_month_range.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | require 'pipely/pipeline_date_time/pipeline_date_range_base' 3 | 4 | module Pipely 5 | module PipelineDateTime 6 | # Class that represents a range of individual pipeline months 7 | # 8 | class PipelineMonthRange < PipelineDateRangeBase 9 | MINIMUM_MONTH_OFFSET = 30 # The month of x+/-30 will never add extra days 10 | MONTH_INTERVAL = 28 # We never miss a month by taking every 28 days 11 | 12 | attr_reader :start, :end 13 | 14 | def initialize(target_date, days_back_start, days_back_end) 15 | @target_date = target_date 16 | @start = days_back_start - MINIMUM_MONTH_OFFSET 17 | @end = days_back_end + MINIMUM_MONTH_OFFSET 18 | @days_back = (@end..@start).step(MONTH_INTERVAL).to_set 19 | end 20 | 21 | def months 22 | @months ||= pipeline_dates.map { |pd| pd.month } 23 | end 24 | end 25 | end 26 | end 27 | -------------------------------------------------------------------------------- /lib/pipely/pipeline_date_time/pipeline_year_range.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | require 'pipely/pipeline_date_time/pipeline_date_range_base' 3 | 4 | module Pipely 5 | module PipelineDateTime 6 | # Class that represents a range of individual pipeline years 7 | # 8 | class PipelineYearRange < PipelineDateRangeBase 9 | DAYS_IN_YEAR = 365 10 | 11 | attr_reader :start, :end 12 | 13 | def initialize(target_date, days_back_start, days_back_end) 14 | @target_date = target_date 15 | @start = days_back_start - DAYS_IN_YEAR 16 | @end = days_back_end + DAYS_IN_YEAR 17 | @days_back = (@end..@start).step(DAYS_IN_YEAR).to_set 18 | end 19 | 20 | def years 21 | @years ||= pipeline_dates.map { |pd| pd.year } 22 | end 23 | end 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /lib/pipely/reference_list.rb: -------------------------------------------------------------------------------- 1 | require 'pipely/dependency' 2 | 3 | module Pipely 4 | 5 | # A list of references to Components for managing dependencies 6 | # 7 | class ReferenceList 8 | 9 | def initialize(input) 10 | @raw_references = [input].flatten.compact 11 | end 12 | 13 | def build_dependencies(label) 14 | @raw_references.map{|h| Dependency.new(label, h['ref'])} 15 | end 16 | 17 | def to_json(options={}, depth=0) 18 | if 1 == @raw_references.count 19 | @raw_references.first.to_json(options) 20 | else 21 | @raw_references.to_json(options) 22 | end 23 | end 24 | 25 | def present? 26 | !@raw_references.empty? 27 | end 28 | 29 | end 30 | 31 | end 32 | 33 | -------------------------------------------------------------------------------- /lib/pipely/runs_report.rb: -------------------------------------------------------------------------------- 1 | module Pipely 2 | 3 | # Prints a CLI report of the execution status of a live pipeline 4 | class RunsReport < Struct.new(:task_states_by_scheduled_start) 5 | 6 | def print 7 | return false unless $stdout.tty? 8 | 9 | task_states_by_scheduled_start.each do |scheduled_start, task_states| 10 | task_states.to_a.sort_by(&:first).each do |task_name, attributes| 11 | current_state = attributes[:execution_state] 12 | 13 | puts task_name.ljust(55) + 14 | "scheduled_start: #{scheduled_start}\t\t" + 15 | "current_state: #{current_state}" 16 | end 17 | end 18 | end 19 | 20 | end 21 | end 22 | 23 | -------------------------------------------------------------------------------- /lib/pipely/s3_writer.rb: -------------------------------------------------------------------------------- 1 | require 'aws-sdk' 2 | 3 | module Pipely 4 | 5 | # Writes content from a String to an S3 path 6 | # 7 | class S3Writer 8 | 9 | def initialize(s3_path) 10 | uri = URI.parse(s3_path) 11 | @host, @path = uri.host, uri.path.gsub(/^\//,'') 12 | end 13 | 14 | def write(content) 15 | s3_bucket = Aws::S3::Bucket.new(@host) 16 | s3_object = s3_bucket.object(@path) 17 | s3_object.put(body: content, acl: 'public') 18 | s3_object.public_url 19 | end 20 | end 21 | end 22 | -------------------------------------------------------------------------------- /lib/pipely/shared_examples.rb: -------------------------------------------------------------------------------- 1 | # Shared examples to be used by projects that use pipely. 2 | # 3 | 4 | shared_examples "a renderable template" do |environment, config| 5 | let(:rendered_json) { 6 | Pipely::Build.build_definition( 7 | subject, 8 | environment, 9 | config 10 | ).to_json 11 | } 12 | 13 | it "renders without error" do 14 | expect(rendered_json).to be 15 | end 16 | 17 | it "produces valid JSON" do 18 | expect(JSON.parse(rendered_json)).to be 19 | end 20 | 21 | it "does not contain objects with duplicate ids" do 22 | objects = JSON.parse(rendered_json)['objects'] 23 | distinct_ids = objects.map{|h| h['id']}.uniq.compact 24 | 25 | expect(objects.count).to eq(distinct_ids.count) 26 | end 27 | 28 | it "does not generate SnsAlert subjects that are over 100 characters" do 29 | objects = JSON.parse(rendered_json)['objects'] 30 | sns_alarms = objects.select{|h| h['type'] == 'SnsAlarm'} 31 | 32 | max_object_id = objects.map{|h| h['id']}.max_by(&:length) 33 | max_attempt_id = max_object_id + "_2014-01-01T00:00:00_Attempt=1" 34 | 35 | sns_alarms.each do |h| 36 | # NOTE: This currently only handles the interpolation we use at Swipely. 37 | # TODO: Support local evaluation of any valid expression. 38 | interpolated_subject = h['subject'].sub('#{node.name}', max_attempt_id) 39 | expect( interpolated_subject ).to have_at_most(100).chars 40 | end 41 | end 42 | 43 | end 44 | -------------------------------------------------------------------------------- /lib/pipely/tasks.rb: -------------------------------------------------------------------------------- 1 | require 'pipely/tasks/upload_steps' 2 | require 'pipely/tasks/deploy' 3 | require 'pipely/tasks/upload_pipeline_as_gem' 4 | require 'pipely/tasks/graph' 5 | require 'pipely/tasks/definition' 6 | 7 | module Pipely 8 | module Tasks 9 | end 10 | end 11 | -------------------------------------------------------------------------------- /lib/pipely/tasks/definition.rb: -------------------------------------------------------------------------------- 1 | require 'rake' 2 | require 'rake/tasklib' 3 | require 'pipely' 4 | require 'json' 5 | require 'fileutils' 6 | 7 | module Pipely 8 | module Tasks 9 | class Definition < ::Rake::TaskLib 10 | include ::Rake::DSL if defined?(::Rake::DSL) 11 | 12 | # Name of task. 13 | # 14 | # default: 15 | # :definition 16 | attr_accessor :name 17 | 18 | # Path where rendered definitions are written. 19 | # 20 | # default: 21 | # "definitions" 22 | attr_accessor :path 23 | 24 | # Pipeline definition instance 25 | attr_accessor :definition 26 | 27 | # Use verbose output. If this is set to true, the task will print the 28 | # local and remote paths of each step file it uploads to S3. 29 | # 30 | # default: 31 | # true 32 | attr_accessor :verbose 33 | 34 | def initialize(*args, &task_block) 35 | setup_ivars(args) 36 | 37 | # First non-name parameter allows overriding the configured scheduler. 38 | args.unshift(:scheduler) 39 | 40 | directory path 41 | 42 | desc "Generates the pipeline definition file" 43 | task name, *args do |_, task_args| 44 | RakeFileUtils.send(:verbose, verbose) do 45 | if task_block 46 | task_block.call(*[self, task_args].slice(0, task_block.arity)) 47 | end 48 | 49 | if scheduler_override = task_args[:scheduler] 50 | definition.config[:scheduler] = scheduler_override 51 | end 52 | 53 | run_task verbose 54 | end 55 | end 56 | end 57 | 58 | def setup_ivars(args) 59 | @name = args.shift || :definition 60 | @verbose = true 61 | @path = "definitions" 62 | end 63 | 64 | def run_task(verbose) 65 | puts "Generating #{target_filename}" if verbose 66 | 67 | json = definition.to_json 68 | 69 | unless ENV['UGLY'] 70 | json = JSON.pretty_generate(JSON.parse(json)) 71 | end 72 | 73 | File.open(target_filename, 'w') do |file| 74 | file.write(json) 75 | end 76 | end 77 | 78 | def target_filename 79 | "#{path}/#{definition.base_filename}.json" 80 | end 81 | 82 | end 83 | end 84 | end 85 | -------------------------------------------------------------------------------- /lib/pipely/tasks/deploy.rb: -------------------------------------------------------------------------------- 1 | require 'rake' 2 | require 'rake/tasklib' 3 | require 'pipely/deploy' 4 | 5 | module Pipely 6 | module Tasks 7 | class Deploy < ::Rake::TaskLib 8 | include ::Rake::DSL if defined?(::Rake::DSL) 9 | 10 | # Name of task. 11 | # 12 | # default: 13 | # :deploy 14 | attr_accessor :name 15 | 16 | # Pipeline definition instance 17 | attr_accessor :definition 18 | 19 | # Use verbose output. If this is set to true, the task will print the 20 | # local and remote paths of each step file it uploads to S3. 21 | # 22 | # default: 23 | # true 24 | attr_accessor :verbose 25 | 26 | def initialize(*args, &task_block) 27 | setup_ivars(args) 28 | 29 | # First non-name parameter allows overriding the configured scheduler. 30 | args.unshift(:scheduler) 31 | 32 | desc "Deploy pipeline" unless ::Rake.application.last_comment 33 | 34 | task name, *args do |_, task_args| 35 | RakeFileUtils.send(:verbose, verbose) do 36 | if task_block 37 | task_block.call(*[self, task_args].slice(0, task_block.arity)) 38 | end 39 | 40 | if scheduler_override = task_args[:scheduler] 41 | definition.config[:scheduler] = scheduler_override 42 | end 43 | 44 | run_task verbose 45 | end 46 | end 47 | end 48 | 49 | def setup_ivars(args) 50 | @name = args.shift || :deploy 51 | @verbose = true 52 | end 53 | 54 | def run_task(verbose) 55 | Rake::Task["upload_steps"].invoke 56 | 57 | Pipely::Deploy::Client.new 58 | .deploy_pipeline(definition.pipeline_name) do |pipeline_id| 59 | definition.pipeline_id = pipeline_id 60 | definition.to_json 61 | end 62 | end 63 | 64 | end 65 | end 66 | end 67 | -------------------------------------------------------------------------------- /lib/pipely/tasks/graph.rb: -------------------------------------------------------------------------------- 1 | require 'rake' 2 | require 'rake/tasklib' 3 | require 'pipely' 4 | 5 | module Pipely 6 | module Tasks 7 | class Graph < ::Rake::TaskLib 8 | include ::Rake::DSL if defined?(::Rake::DSL) 9 | 10 | # Name of task. 11 | # 12 | # default: 13 | # :graph 14 | attr_accessor :name 15 | 16 | # Path to write graph images to. 17 | # 18 | # default: 19 | # "graphs" 20 | attr_accessor :path 21 | 22 | # Pipeline definition instance 23 | attr_accessor :definition 24 | 25 | # Use verbose output. If this is set to true, the task will print the 26 | # local and remote paths of each step file it uploads to S3. 27 | # 28 | # default: 29 | # true 30 | attr_accessor :verbose 31 | 32 | def initialize(*args, &task_block) 33 | setup_ivars(args) 34 | 35 | # create the `path` directory if it doesn't exist 36 | directory path 37 | 38 | namespace name do 39 | task :full => path do |_, task_args| 40 | RakeFileUtils.send(:verbose, verbose) do 41 | if task_block 42 | task_block.call(*[self, task_args].slice(0, task_block.arity)) 43 | end 44 | 45 | run_task verbose 46 | end 47 | end 48 | 49 | task :open => :full do 50 | `open #{target_filename}` 51 | end 52 | end 53 | 54 | desc "Graphs the full pipeline definition using Graphviz" 55 | task name => "#{name}:full" 56 | end 57 | 58 | def setup_ivars(args) 59 | @name = args.shift || :graph 60 | @verbose = true 61 | @path = "graphs" 62 | end 63 | 64 | def run_task(verbose) 65 | puts "Generating #{target_filename}" if verbose 66 | Pipely.draw(definition.to_json, target_filename) 67 | end 68 | 69 | def target_filename 70 | "#{path}/#{definition.base_filename}.png" 71 | end 72 | 73 | end 74 | end 75 | end 76 | -------------------------------------------------------------------------------- /lib/pipely/tasks/upload_pipeline_as_gem.rb: -------------------------------------------------------------------------------- 1 | require 'rake' 2 | require 'rake/tasklib' 3 | require 'aws-sdk' 4 | require 'erubis' 5 | require 'pipely/deploy/bootstrap' 6 | 7 | module Pipely 8 | module Tasks 9 | class UploadPipelineAsGem < ::Rake::TaskLib 10 | include ::Rake::DSL if defined?(::Rake::DSL) 11 | 12 | # Name of this rake task 13 | attr_accessor :name 14 | 15 | attr_accessor :bucket_name 16 | attr_accessor :s3_steps_path 17 | attr_accessor :s3_gems_path 18 | attr_accessor :config 19 | attr_accessor :templates 20 | 21 | def initialize(*args, &task_block) 22 | setup_ivars(args) 23 | 24 | task name, *args do |_, task_args| 25 | RakeFileUtils.send(:verbose, verbose) do 26 | if task_block 27 | task_block.call(*[self, task_args].slice(0, task_block.arity)) 28 | end 29 | 30 | run_task verbose 31 | end 32 | end 33 | 34 | Rake::Task["upload_steps"].enhance [name] 35 | end 36 | 37 | def setup_ivars(args) 38 | @name = args.shift || 'deploy:upload_pipeline_as_gem' 39 | @verbose = true 40 | @templates = Dir.glob("templates/*.erb") 41 | end 42 | 43 | def run_task(verbose) 44 | s3_gem_paths = upload_gems 45 | context = build_bootstrap_context(s3_gem_paths) 46 | 47 | templates.each do |erb_file| 48 | upload_filename = File.basename(erb_file).sub( /\.erb$/, '' ) 49 | 50 | # Exclude the pipeline.json 51 | if upload_filename == 'pipeline.json' 52 | next 53 | end 54 | 55 | template_erb = Erubis::Eruby.new( File.read(erb_file) ) 56 | upload_to_s3( upload_filename, template_erb.result(context) ) 57 | end 58 | end 59 | 60 | private 61 | def s3_bucket 62 | @s3_bucket ||= Aws::S3::Bucket.new(@bucket_name) 63 | end 64 | 65 | def upload_gems 66 | pipeline_gems = Pipely::Bundler.gem_files 67 | s3_uploader = Pipely::Deploy::S3Uploader.new(s3_bucket, s3_gems_path) 68 | s3_uploader.upload(pipeline_gems.values) 69 | s3_uploader.s3_urls(pipeline_gems.values) 70 | end 71 | 72 | def build_bootstrap_context(s3_gems) 73 | bootstrap_helper = Pipely::Deploy::Bootstrap.new(s3_gems, s3_steps_path) 74 | 75 | context = bootstrap_helper.context(config['bootstrap_mixins']) 76 | 77 | # erb context 78 | { 79 | bootstrap: context, 80 | config: config 81 | } 82 | end 83 | 84 | def upload_to_s3( upload_filename, body ) 85 | s3_dest = File.join(@s3_steps_path, upload_filename) 86 | puts "uploading #{s3_dest}" if verbose 87 | s3_bucket.objects[s3_dest].write(body) 88 | end 89 | end 90 | end 91 | end 92 | -------------------------------------------------------------------------------- /lib/pipely/tasks/upload_steps.rb: -------------------------------------------------------------------------------- 1 | require 'rake' 2 | require 'rake/tasklib' 3 | 4 | module Pipely 5 | module Tasks 6 | class UploadSteps < ::Rake::TaskLib 7 | include ::Rake::DSL if defined?(::Rake::DSL) 8 | 9 | # Name of task. 10 | # 11 | # default: 12 | # :upload_steps 13 | attr_accessor :name 14 | 15 | # Local path to where the step files are. 16 | # 17 | # default: 18 | # "steps" 19 | attr_accessor :local_path 20 | 21 | # Name of S3 bucket to upload steps to. 22 | attr_accessor :s3_bucket_name 23 | 24 | # Path within S3 bucket to upload steps to. 25 | attr_accessor :s3_path 26 | 27 | # Use verbose output. If this is set to true, the task will print the 28 | # local and remote paths of each step file it uploads to S3. 29 | # 30 | # default: 31 | # true 32 | attr_accessor :verbose 33 | 34 | def initialize(*args, &task_block) 35 | setup_ivars(args) 36 | 37 | unless ::Rake.application.last_comment 38 | desc "Upload Data Pipeline steps to S3" 39 | end 40 | 41 | task name, *args do |_, task_args| 42 | RakeFileUtils.send(:verbose, verbose) do 43 | if task_block 44 | task_block.call(*[self, task_args].slice(0, task_block.arity)) 45 | end 46 | 47 | run_task verbose 48 | end 49 | end 50 | end 51 | 52 | def setup_ivars(args) 53 | @name = args.shift || :upload_steps 54 | @verbose = true 55 | @local_path = "steps" 56 | end 57 | 58 | def run_task(verbose) 59 | with_bucket do |bucket| 60 | s3_uploader = Pipely::Deploy::S3Uploader.new(bucket, s3_path) 61 | s3_uploader.upload(step_files) 62 | end 63 | end 64 | 65 | private 66 | 67 | def with_bucket 68 | s3 = Aws::S3::Resource.new 69 | bucket = s3.bucket(s3_bucket_name) 70 | 71 | if bucket.exists? 72 | yield(bucket) 73 | else 74 | raise "Couldn't find S3 bucket '#{s3_bucket_name}'" 75 | end 76 | end 77 | 78 | def step_files 79 | FileList.new(File.join(local_path, "**", "*")).reject { |fname| 80 | File.directory?( fname ) 81 | } 82 | end 83 | 84 | end 85 | end 86 | end 87 | -------------------------------------------------------------------------------- /lib/pipely/version.rb: -------------------------------------------------------------------------------- 1 | module Pipely 2 | VERSION = '0.14.1' unless defined?(::Pipely::VERSION) 3 | end 4 | -------------------------------------------------------------------------------- /pipely.gemspec: -------------------------------------------------------------------------------- 1 | $:.push File.expand_path("../lib", __FILE__) 2 | 3 | require "pipely/version" 4 | 5 | Gem::Specification.new do |s| 6 | s.name = "pipely" 7 | s.version = Pipely::VERSION 8 | s.authors = ["Matt Gillooly"] 9 | s.email = ["matt@swipely.com"] 10 | s.homepage = "http://github.com/swipely/pipely" 11 | s.summary = "Generate dependency graphs from pipeline definitions." 12 | s.license = 'MIT' 13 | 14 | s.files = Dir["{lib}/**/*"] + ["Rakefile", "README.md"] 15 | s.test_files = Dir["spec/**/*"] 16 | 17 | s.add_dependency "ruby-graphviz" 18 | s.add_dependency "rake" 19 | s.add_dependency "virtus", "~>1.0.0" 20 | s.add_dependency "aws-sdk", "~>2.0" 21 | s.add_dependency "unf" 22 | s.add_dependency "activesupport" 23 | s.add_dependency "erubis" 24 | s.add_dependency 'pathology', '~> 0.1.0' 25 | s.add_development_dependency 'safe_yaml', '~> 1.0.4' 26 | s.add_development_dependency "rspec", "~>2.14.0" 27 | s.add_development_dependency "cane" 28 | s.add_development_dependency "timecop" 29 | s.add_development_dependency "vcr" 30 | s.add_development_dependency "webmock" 31 | s.add_development_dependency "pry" 32 | 33 | s.executables << 'pipely' 34 | end 35 | -------------------------------------------------------------------------------- /spec/fixtures/bootstrap_contexts/green.rb: -------------------------------------------------------------------------------- 1 | module Fixtures 2 | module BootstrapContexts 3 | module Green 4 | def green 5 | "green" 6 | end 7 | end 8 | end 9 | end 10 | -------------------------------------------------------------------------------- /spec/fixtures/bootstrap_contexts/simple.rb: -------------------------------------------------------------------------------- 1 | module Fixtures 2 | module BootstrapContexts 3 | module Simple 4 | def simple 5 | "simple" 6 | end 7 | end 8 | end 9 | end 10 | -------------------------------------------------------------------------------- /spec/fixtures/templates/bootstrap.sh.erb: -------------------------------------------------------------------------------- 1 | one 2 | two 3 | three 4 | <%= bootstrap.simple %> 5 | -------------------------------------------------------------------------------- /spec/fixtures/vcr_cassettes/build_and_upload_gems.yml: -------------------------------------------------------------------------------- 1 | --- 2 | http_interactions: 3 | - request: 4 | method: put 5 | uri: https://a-test-bucket.s3.amazonaws.com/test_path/gems/pipely-0.7.0.gem 6 | body: 7 | encoding: UTF-8 8 | string: !binary |- 9 | bWV0YWRhdGEuZ3oAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 10 | headers: 11 | Content-Type: 12 | - '' 13 | Accept-Encoding: 14 | - '' 15 | Content-Length: 16 | - '21504' 17 | User-Agent: 18 | - aws-sdk-ruby/1.50.0 ruby/2.0.0 x86_64-darwin13.3.0 19 | Date: 20 | - Wed, 10 Sep 2014 00:58:35 GMT 21 | Authorization: 22 | - AWS 123 23 | Accept: 24 | - '*/*' 25 | response: 26 | status: 27 | code: 200 28 | message: OK 29 | headers: 30 | X-Amz-Id-2: 31 | - 123 32 | X-Amz-Request-Id: 33 | - 123 34 | Date: 35 | - Wed, 10 Sep 2014 00:58:36 GMT 36 | Etag: 37 | - '"123"' 38 | Content-Length: 39 | - '0' 40 | Server: 41 | - AmazonS3 42 | body: 43 | encoding: UTF-8 44 | string: '' 45 | http_version: 46 | recorded_at: Wed, 10 Sep 2014 00:58:35 GMT 47 | recorded_with: VCR 2.9.2 48 | -------------------------------------------------------------------------------- /spec/lib/pipely/build/daily_scheduler_spec.rb: -------------------------------------------------------------------------------- 1 | require 'pipely/build/daily_scheduler' 2 | require 'timecop' 3 | describe Pipely::Build::DailyScheduler do 4 | 5 | let(:start_time) { "11:00:00" } 6 | subject { described_class.new(start_time) } 7 | 8 | describe "#period" do 9 | it "is '24 hours'" do 10 | expect(subject.period).to eq('24 hours') 11 | end 12 | end 13 | 14 | context "if the start time is garbage" do 15 | let(:start_time) { "0ksnsnk" } 16 | it 'Raises an error' do 17 | expect {described_class.new(start_time)}.to raise_exception ArgumentError 18 | end 19 | end 20 | 21 | describe "#start_date_time" do 22 | context "if the start time is 11:00:00 UTC" do 23 | let(:start_time) { "11:00:00" } 24 | it "and it is after that it chooses the start time tomorrow" do 25 | Timecop.freeze(Time.utc(2013, 6, 13, 16, 12, 30)) do 26 | expect(subject.start_date_time).to eq("2013-06-14T11:00:00") 27 | end 28 | end 29 | 30 | it "and it is before that it chooses the start time today" do 31 | Timecop.freeze(Time.utc(2013, 6, 13, 4, 12, 30)) do 32 | expect(subject.start_date_time).to eq("2013-06-13T11:00:00") 33 | end 34 | end 35 | end 36 | 37 | context "if the start time has is badly formatted" do 38 | let(:start_time) { "9:00:" } 39 | it "chooses the start time today" do 40 | Timecop.freeze(Time.utc(2013, 6, 13, 4, 12, 30)) do 41 | expect(subject.start_date_time).to eq("2013-06-13T09:00:00") 42 | end 43 | end 44 | it "chooses the start time tomorrow" do 45 | Timecop.freeze(Time.utc(2013, 6, 13, 11, 12, 30)) do 46 | expect(subject.start_date_time).to eq("2013-06-14T09:00:00") 47 | end 48 | end 49 | end 50 | end 51 | end 52 | -------------------------------------------------------------------------------- /spec/lib/pipely/build/environment_config_spec.rb: -------------------------------------------------------------------------------- 1 | require 'pipely/build/environment_config' 2 | 3 | describe Pipely::Build::EnvironmentConfig do 4 | 5 | describe '.load(filename, environment)' do 6 | let(:filename) { 'path/to/config/yaml.yml' } 7 | 8 | let(:config) do 9 | YAML.load(<<-EOS) 10 | my_env: 11 | key: 'my_val' 12 | production: 13 | key: 'prod_val' 14 | staging: 15 | key: 'staging_val' 16 | EOS 17 | end 18 | 19 | before do 20 | allow(YAML).to receive(:load_file).with(filename) { config } 21 | end 22 | 23 | context 'given a custom environment' do 24 | subject { described_class.load(filename, 'my_env') } 25 | 26 | it 'loads config from a YAML file' do 27 | expect(subject[:key]).to eq('my_val') 28 | end 29 | end 30 | 31 | context 'given the "production" environment' do 32 | subject { described_class.load(filename, 'production') } 33 | 34 | it 'loads config from a YAML file' do 35 | expect(subject[:key]).to eq('prod_val') 36 | end 37 | 38 | it 'supports legacy defaults' do 39 | expect(subject[:s3_prefix]).to eq('production/:namespace') 40 | expect(subject[:scheduler]).to eq('daily') 41 | end 42 | end 43 | 44 | context 'given the "staging" environment' do 45 | subject { described_class.load(filename, 'staging') } 46 | 47 | it 'loads config from a YAML file' do 48 | expect(subject[:key]).to eq('staging_val') 49 | end 50 | 51 | it 'supports legacy defaults' do 52 | expect(subject[:s3_prefix]).to eq('staging/:whoami/:namespace') 53 | expect(subject[:scheduler]).to eq('now') 54 | end 55 | end 56 | end 57 | 58 | end 59 | -------------------------------------------------------------------------------- /spec/lib/pipely/build/right_now_scheduler_spec.rb: -------------------------------------------------------------------------------- 1 | require 'pipely/build/right_now_scheduler' 2 | 3 | describe Pipely::Build::RightNowScheduler do 4 | 5 | describe "#period" do 6 | it "is '1 year'" do 7 | expect(subject.period).to eq('1 year') 8 | end 9 | end 10 | 11 | describe "#start_date_time" do 12 | it "chooses the current time as the start time" do 13 | Timecop.freeze(Time.utc(2013, 6, 12, 16, 12, 30)) do 14 | expect(subject.start_date_time).to eq("2013-06-12T16:12:30") 15 | end 16 | end 17 | end 18 | 19 | end 20 | -------------------------------------------------------------------------------- /spec/lib/pipely/build/s3_path_builder_spec.rb: -------------------------------------------------------------------------------- 1 | require 'pipely/build/s3_path_builder' 2 | 3 | describe Pipely::Build::S3PathBuilder do 4 | 5 | subject { 6 | described_class.new( 7 | logs: 'log-bucket', 8 | steps: 'step-bucket', 9 | assets: 'asset-bucket', 10 | prefix: 'run-prefix', 11 | ) 12 | } 13 | 14 | its(:s3_log_prefix) { 15 | should eq("s3://log-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}") 16 | } 17 | 18 | its(:s3_step_prefix) { 19 | should eq("s3://step-bucket/run-prefix") 20 | } 21 | 22 | its(:s3n_step_prefix) { 23 | should eq("s3n://step-bucket/run-prefix") 24 | } 25 | 26 | its(:s3_asset_prefix) { 27 | should eq("s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}") 28 | } 29 | 30 | its(:s3n_asset_prefix) { 31 | should eq("s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}") 32 | } 33 | 34 | its(:s3_shared_asset_prefix) { 35 | should eq("s3://asset-bucket/run-prefix/shared/\#{format(@scheduledStartTime,'YYYY-MM-dd')}") 36 | } 37 | 38 | its(:bucket_relative_s3_asset_prefix) { 39 | should eq("run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}") 40 | } 41 | 42 | describe "#to_hash" do 43 | it 'includes the necessary keys for supplying config to a Template' do 44 | expect(subject.to_hash.keys).to include( 45 | :s3_log_prefix, 46 | :s3_step_prefix, 47 | :s3n_step_prefix, 48 | :s3_asset_prefix, 49 | :s3n_asset_prefix, 50 | :s3_shared_asset_prefix, 51 | :bucket_relative_s3_asset_prefix, 52 | ) 53 | end 54 | end 55 | 56 | context "when a custom template is specified via config" do 57 | subject { 58 | described_class.new( 59 | foo: 'my-value', 60 | templates: { 61 | bar: ':protocol://my-bucket/:foo/okay' 62 | } 63 | ) 64 | } 65 | 66 | its(:s3_bar_prefix) { 67 | should eq('s3://my-bucket/my-value/okay') 68 | } 69 | 70 | its(:s3n_bar_prefix) { 71 | should eq('s3n://my-bucket/my-value/okay') 72 | } 73 | 74 | describe "#to_hash" do 75 | it 'includes the keys for the custom template' do 76 | expect(subject.to_hash.keys).to include( 77 | :s3_bar_prefix, 78 | :s3n_bar_prefix, 79 | ) 80 | end 81 | end 82 | end 83 | 84 | end 85 | -------------------------------------------------------------------------------- /spec/lib/pipely/build/template_spec.rb: -------------------------------------------------------------------------------- 1 | require 'pipely/build/template' 2 | 3 | describe Pipely::Build::Template do 4 | let(:source) { "some test json <%= foo %>" } 5 | 6 | subject { described_class.new(source) } 7 | 8 | context 'given some configuration' do 9 | let(:foo) { 'asdfgwrytqfadfa' } 10 | let(:expected_json) { "some test json #{foo}" } 11 | 12 | before do 13 | subject.apply_config({ foo: foo }) 14 | end 15 | 16 | its(:to_json) { should eq(expected_json) } 17 | end 18 | 19 | describe "#streaming_hadoop_step(options)" do 20 | before do 21 | # emulate applying config from S3PathBuilder, as done in Definition#to_json 22 | subject.apply_config({ 23 | s3_log_prefix: "s3://log-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}", 24 | s3_step_prefix: "s3://step-bucket/run-prefix", 25 | s3n_step_prefix: "s3n://step-bucket/run-prefix", 26 | s3_asset_prefix: "s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}", 27 | s3n_asset_prefix: "s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}", 28 | s3_shared_asset_prefix: "s3://asset-bucket/run-prefix/shared/\#{format(@scheduledStartTime,'YYYY-MM-dd')}", 29 | bucket_relative_s3_asset_prefix: "run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}", 30 | }) 31 | end 32 | 33 | it "builds a streaming hadoop step" do 34 | step = subject.streaming_hadoop_step( 35 | :input => '/input_dir/', 36 | :output => '/output_dir/', 37 | :mapper => '/mapper.rb', 38 | :reducer => '/reducer.rb' 39 | ) 40 | 41 | expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,s3n://step-bucket/run-prefix/reducer.rb") 42 | end 43 | 44 | context "given an array of inputs" do 45 | it 'points to the IdentityReducer correctly (not as an S3 URL)' do 46 | step = subject.streaming_hadoop_step( 47 | :input => ['/input_dir/', '/input_dir2/'], 48 | :output => '/output_dir/', 49 | :mapper => '/mapper.rb', 50 | :reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer' 51 | ) 52 | 53 | expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir2/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer") 54 | end 55 | end 56 | 57 | context "given a cacheFile" do 58 | it 'points to the IdentityReducer correctly (not as an S3 URL)' do 59 | step = subject.streaming_hadoop_step( 60 | :input => '/input_dir/', 61 | :output => '/output_dir/', 62 | :cache_file => '/cache_file#cache_file', 63 | :mapper => '/mapper.rb', 64 | :reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer' 65 | ) 66 | 67 | expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer,-cacheFile,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/cache_file#cache_file") 68 | end 69 | end 70 | 71 | context "given an outputformat" do 72 | it 'points to the outputformat class (not as an S3 URL)' do 73 | step = subject.streaming_hadoop_step( 74 | :input => '/input_dir/', 75 | :output => '/output_dir/', 76 | :outputformat => 'com.swipely.foo.outputformat', 77 | :mapper => '/mapper.rb', 78 | :reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer' 79 | ) 80 | 81 | expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-outputformat,com.swipely.foo.outputformat,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer") 82 | end 83 | end 84 | 85 | context "given the IdentityReducer" do 86 | it 'points to the IdentityReducer correctly (not as an S3 URL)' do 87 | step = subject.streaming_hadoop_step( 88 | :input => '/input_dir/', 89 | :output => '/output_dir/', 90 | :mapper => '/mapper.rb', 91 | :reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer' 92 | ) 93 | 94 | expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer") 95 | end 96 | end 97 | 98 | context "given jar files" do 99 | it 'loads the file correctly' do 100 | step = subject.streaming_hadoop_step( 101 | :input => '/input_dir/', 102 | :output => '/output_dir/', 103 | :mapper => '/mapper.rb', 104 | :reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer', 105 | :lib_jars => [ 'filter.jar', 'filter2.jar' ], 106 | ) 107 | 108 | expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-libjars,filter.jar,-libjars,filter2.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer") 109 | end 110 | end 111 | 112 | context "given variables" do 113 | it 'defines them correctly' do 114 | step = subject.streaming_hadoop_step( 115 | :input => '/input_dir/', 116 | :output => '/output_dir/', 117 | :mapper => '/mapper.rb', 118 | :reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer', 119 | :defs => {'mapred.text.key.partitioner.options' => '-k1,1'} 120 | ) 121 | 122 | expect(step).to eq('/home/hadoop/contrib/streaming/hadoop-streaming.jar,-D,mapred.text.key.partitioner.options=-k1\\,1,-input,s3n://asset-bucket/run-prefix/#{format(@scheduledStartTime,\'YYYY-MM-dd_HHmmss\')}/input_dir/,-output,s3://asset-bucket/run-prefix/#{format(@scheduledStartTime,\'YYYY-MM-dd_HHmmss\')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer') 123 | end 124 | end 125 | end 126 | 127 | end 128 | -------------------------------------------------------------------------------- /spec/lib/pipely/build_spec.rb: -------------------------------------------------------------------------------- 1 | require 'pipely/build' 2 | 3 | describe Pipely::Build do 4 | 5 | describe '.build_definition(template, environment, config_path)' do 6 | 7 | let(:template) { double } 8 | let(:environment) { 'production' } 9 | let(:config_path) { 'path/to/config' } 10 | 11 | let(:config) { double } 12 | 13 | before do 14 | allow(Pipely::Build::EnvironmentConfig).to receive(:load). 15 | with(config_path, environment.to_sym). 16 | and_return(config) 17 | end 18 | 19 | it 'builds a Definition' do 20 | expect( 21 | described_class.build_definition(template, environment, config_path) 22 | ).to eq( 23 | Pipely::Build::Definition.new( 24 | template, 25 | environment.to_sym, 26 | config 27 | ) 28 | ) 29 | end 30 | end 31 | 32 | end 33 | -------------------------------------------------------------------------------- /spec/lib/pipely/bundler/bundle_spec.rb: -------------------------------------------------------------------------------- 1 | require 'pipely/bundler/bundle' 2 | 3 | describe Pipely::Bundler::Bundle do 4 | 5 | describe ".build" do 6 | let(:groups) { [ :group1 ] } 7 | let(:definition) { double "Bundler::Definition" } 8 | let(:spec_set) { double } 9 | 10 | before do 11 | definition.stub(:specs_for).with(groups) { spec_set } 12 | end 13 | 14 | it 'builds a Bundle instance with the spec_set' do 15 | bundle = described_class.build('vendor/test', groups, definition) 16 | expect(bundle.spec_set).to eq(spec_set) 17 | end 18 | end 19 | 20 | let(:pipely_spec) { double("Gem::Specification", name: "pipely") } 21 | let(:gem1_spec) { double("Gem::Specification", name: "gem1") } 22 | let(:gem2_spec) { double("Gem::Specification", name: "gem2") } 23 | 24 | let(:pipely_source) do 25 | Bundler::Source::Path.new('name' => "pipely", 'path' => '.') 26 | end 27 | 28 | let(:spec_set) { [ pipely_spec, gem1_spec, gem2_spec ] } 29 | let(:locked_sources) { [ pipely_source ] } 30 | 31 | subject { described_class.new('vendor/test', spec_set, locked_sources) } 32 | 33 | describe "#gem_files" do 34 | let(:gem_packager) { double } 35 | 36 | before do 37 | gem_packager.stub(:package).and_return do |spec| 38 | { spec.name => '/path/to/cache/file.gem' } 39 | end 40 | 41 | gem_packager.stub(:build_from_source).and_return do |name, path| 42 | { name => "#{path}/#{name}-X.Y.Z.gem" } 43 | end 44 | end 45 | 46 | it "returns a cache file for each gem" do 47 | gem_files = subject.gem_files(gem_packager: gem_packager) 48 | expect(gem_files.keys).to match_array(%w[ gem1 gem2 pipely ]) 49 | end 50 | 51 | it "filters out gems to exclude" do 52 | gem_files = subject.gem_files(gem_packager: gem_packager, 53 | gems_to_exclude: ['gem2']) 54 | expect(gem_files.keys).to match_array(%w[ gem1 pipely ]) 55 | end 56 | 57 | context "given a packaged/non-locked gem" do 58 | it "returns the gems and their existing cache files" do 59 | expect(gem_packager).to receive(:package).with(gem1_spec) 60 | expect(gem_packager).to receive(:package).with(gem2_spec) 61 | 62 | subject.gem_files(gem_packager: gem_packager) 63 | end 64 | end 65 | 66 | context "given a locked-source gem" do 67 | it "should build new cache files from source" do 68 | expect(gem_packager).to receive(:build_from_source).with( 69 | pipely_source.name, 70 | pipely_source.path 71 | ) 72 | 73 | subject.gem_files(gem_packager: gem_packager) 74 | end 75 | end 76 | end 77 | 78 | end 79 | -------------------------------------------------------------------------------- /spec/lib/pipely/bundler/gem_packager_spec.rb: -------------------------------------------------------------------------------- 1 | require 'fileutils' 2 | require 'pipely/bundler/gem_packager' 3 | 4 | describe Pipely::Bundler::GemPackager do 5 | 6 | subject { described_class.new(vendor_path) } 7 | let(:vendor_path) { 'vendor/test' } 8 | 9 | before(:each) { 10 | unless Dir.exists? vendor_path 11 | FileUtils.mkdir_p 'vendor/test' 12 | end 13 | } 14 | 15 | describe "#package" do 16 | let(:gem_spec) do 17 | double("spec", 18 | name: 'test', 19 | cache_file: 'a/cache/file', 20 | gem_dir:'a/gem/dir', 21 | version:'0.0.1' 22 | ) 23 | end 24 | 25 | let(:vendored_gem) { "vendor/test/file" } 26 | 27 | context "with a cache file" do 28 | before do 29 | allow(File).to receive(:exists?).with(vendored_gem) { false } 30 | allow(File).to receive(:exists?).with(gem_spec.cache_file) { true } 31 | allow(FileUtils).to receive(:cp).with( 32 | gem_spec.cache_file, vendored_gem) 33 | end 34 | 35 | it "returns the cache file" do 36 | expect(subject.package(gem_spec)).to eq( 37 | {gem_spec.name => vendored_gem} 38 | ) 39 | end 40 | end 41 | 42 | context "without a cache file" do 43 | before do 44 | allow(File).to receive(:exists?).with(gem_spec.cache_file) { false } 45 | allow(File).to receive(:exists?).with(vendored_gem) { false } 46 | end 47 | 48 | context "if source is available" do 49 | before do 50 | allow(File).to receive(:directory?).with(gem_spec.gem_dir) { true } 51 | end 52 | 53 | it "builds the gem from source" do 54 | expect(subject).to receive(:build_from_source).and_return( 55 | {"test"=>"a/packaged/file"}) 56 | 57 | expect(subject.package(gem_spec)).to eq({"test"=>"a/packaged/file"}) 58 | end 59 | end 60 | 61 | context "if source not available, e.g. json-1.8.1 built into Ruby 2.1" do 62 | before do 63 | allow(File).to receive(:directory?).with(gem_spec.gem_dir) { false } 64 | end 65 | 66 | it "downloads from rubygems" do 67 | remote_fetcher = double(:remote_fetcher) 68 | expect(Gem::RemoteFetcher).to receive(:new).and_return(remote_fetcher) 69 | expect(remote_fetcher).to receive(:fetch_path). 70 | with("https://rubygems.org/downloads/test-0.0.1.gem") 71 | expect(subject.package(gem_spec)).to eq( 72 | {"test"=>"vendor/test/test-0.0.1.gem"}) 73 | end 74 | end 75 | 76 | end 77 | end 78 | 79 | describe "#build_from_source" do 80 | context "with bad spec" do 81 | it "raises" do 82 | expect { subject.build_from_source("bad-name", ".") }.to raise_error 83 | end 84 | end 85 | end 86 | 87 | end 88 | -------------------------------------------------------------------------------- /spec/lib/pipely/bundler/project_gem_spec.rb: -------------------------------------------------------------------------------- 1 | # Copyright Swipely, Inc. All rights reserved. 2 | 3 | require 'spec_helper' 4 | require 'pipely/deploy/bootstrap' 5 | 6 | describe Pipely::Bundler::ProjectGem do 7 | 8 | let(:project_spec) do 9 | double "Gem::Specification", 10 | name: "my-project", 11 | file_name: "/path/to/cache/my-project.gem" 12 | end 13 | 14 | subject { described_class.new(project_spec, 'vendor/test') } 15 | 16 | describe ".load" do 17 | let(:filename) { 'foo.gemspec' } 18 | let(:gemspec) { double } 19 | 20 | before do 21 | allow(Dir).to receive(:glob).with("*.gemspec") { [ filename ] } 22 | allow(Gem::Specification).to receive(:load).with(filename) { gemspec } 23 | end 24 | 25 | it "loads the gemspec" do 26 | loaded = described_class.load('vendor/test') 27 | expect(loaded.project_spec).to eq(gemspec) 28 | end 29 | end 30 | 31 | describe "#gem_files" do 32 | let(:dependency_gem_files) do 33 | { 34 | 'packaged-gem1' => '/path/to/cache/packaged-gem1.gem', 35 | 'built-from-source-gem1' => '/path/to/cache/built-from-source-gem1.gem', 36 | } 37 | end 38 | 39 | let(:project_gem_file) do 40 | { 41 | project_spec.name => project_spec.file_name 42 | } 43 | end 44 | 45 | before do 46 | allow(subject).to receive(:dependency_gem_files) { dependency_gem_files } 47 | allow(subject).to receive(:project_gem_file) { project_gem_file } 48 | end 49 | 50 | it "combines the dependency_gem_files and the project_gem_file" do 51 | expect(subject.gem_files.keys).to match_array( 52 | dependency_gem_files.keys + project_gem_file.keys 53 | ) 54 | end 55 | 56 | it "lists the project_gem_file last" do 57 | expect(subject.gem_files.keys.last).to eq(project_spec.name) 58 | end 59 | end 60 | 61 | describe "#dependency_gem_files" do 62 | let(:bundle) { double "Pipely::Bundler::Bundle" } 63 | let(:excludes) { { gems_to_exclude: [project_spec.name, 'bundler'] } } 64 | let(:filtered_gem_files) do 65 | { 66 | 'packaged-gem1' => '/path/to/cache/packaged-gem1.gem', 67 | 'built-from-source-gem1' => '/path/to/cache/built-from-source-gem1.gem', 68 | 'bundler' => '/path/to/cache/bundler.gem', 69 | project_spec.name => project_spec.file_name, 70 | } 71 | end 72 | 73 | it "should filter out the bundler gem and the project gem" do 74 | expect(bundle).to receive(:gem_files).with(excludes) {filtered_gem_files} 75 | expect(subject.dependency_gem_files(bundle)).to be(filtered_gem_files) 76 | end 77 | end 78 | 79 | describe "#project_gem_file" do 80 | let(:gem_packager) { double "Pipely::Bundler::GemPackager" } 81 | let(:project_gem_file) { double } 82 | 83 | before do 84 | allow(gem_packager).to receive(:build_from_source) { project_gem_file } 85 | end 86 | 87 | it "should return the project's own gem file" do 88 | result = subject.project_gem_file(gem_packager) 89 | expect(result).to eq(project_gem_file) 90 | end 91 | end 92 | 93 | end 94 | -------------------------------------------------------------------------------- /spec/lib/pipely/component_spec.rb: -------------------------------------------------------------------------------- 1 | require 'pipely/component' 2 | require 'pipely/dependency' 3 | 4 | describe Pipely::Component do 5 | 6 | subject { 7 | described_class.new( 8 | id: 'my-component', 9 | type: 'OreoSalad', 10 | dependsOn: {'ref' => 'asdf'}, 11 | input: {'ref' => 'infile'}, 12 | output: {'ref' => 'outfile'}, 13 | color: 'yellow', 14 | execution_state: 'WAITING_FOR_RUNNER', 15 | ) 16 | } 17 | 18 | it 'coerces dependsOn into a ReferenceList' do 19 | expect(subject.dependsOn).to be_a(Pipely::ReferenceList) 20 | end 21 | 22 | describe '#graphviz_options' do 23 | it 'builds properties for graphviz node representing this component' do 24 | expect(subject.graphviz_options).to eq({ 25 | :shape => 'record', 26 | :label => '{my-component|OreoSalad|WAITING_FOR_RUNNER}', 27 | :color => 'yellow', 28 | :fillcolor => 'bisque4', 29 | :style => 'filled', 30 | }) 31 | end 32 | end 33 | 34 | describe '#dependencies' do 35 | it 'includes dependsOn edges' do 36 | expect(subject.dependencies).to eq([ 37 | Pipely::Dependency.new('dependsOn', 'asdf'), 38 | Pipely::Dependency.new('input', 'infile'), 39 | Pipely::Dependency.new('output', 'outfile'), 40 | ]) 41 | end 42 | end 43 | 44 | end 45 | -------------------------------------------------------------------------------- /spec/lib/pipely/definition_spec.rb: -------------------------------------------------------------------------------- 1 | require 'pipely/definition' 2 | 3 | describe Pipely::Definition do 4 | 5 | subject { described_class.parse(definition_json) } 6 | 7 | let(:definition_json) { 8 | < { color: 'pink' }, 48 | }) 49 | 50 | pink_node = subject.components.detect{|n| n.id == 'DoStuff'} 51 | 52 | expect(pink_node.color).to eq('pink') 53 | end 54 | end 55 | 56 | end 57 | -------------------------------------------------------------------------------- /spec/lib/pipely/dependency_spec.rb: -------------------------------------------------------------------------------- 1 | require 'pipely/dependency' 2 | 3 | describe Pipely::Dependency do 4 | 5 | describe '#color' do 6 | it 'defaults to "black"' do 7 | expect(subject.color).to eq('black') 8 | end 9 | end 10 | 11 | end 12 | -------------------------------------------------------------------------------- /spec/lib/pipely/deploy/bootstrap_context_spec.rb: -------------------------------------------------------------------------------- 1 | # Copyright Swipely, Inc. All rights reserved. 2 | 3 | require 'spec_helper' 4 | require 'pipely/deploy/bootstrap_context' 5 | 6 | describe Pipely::Deploy::BootstrapContext do 7 | subject do 8 | Pipely::Deploy::BootstrapContext.new.tap do |context| 9 | context.gem_files = ['one.gem', 'two.gem'] 10 | end 11 | end 12 | 13 | let(:aws_install_gems_script) do 14 | " 15 | # one.gem 16 | aws s3 cp one.gem one.gem 17 | gem install --force --local one.gem --no-ri --no-rdoc 18 | 19 | # two.gem 20 | aws s3 cp two.gem two.gem 21 | gem install --force --local two.gem --no-ri --no-rdoc 22 | " 23 | end 24 | 25 | let(:hadoop_install_gems_script) do 26 | " 27 | # one.gem 28 | hadoop fs -copyToLocal one.gem one.gem 29 | gem install --force --local one.gem --no-ri --no-rdoc 30 | 31 | # two.gem 32 | hadoop fs -copyToLocal two.gem two.gem 33 | gem install --force --local two.gem --no-ri --no-rdoc 34 | " 35 | end 36 | 37 | describe "#install_gems_script" do 38 | it "with hadoop fs" do 39 | expect(subject.install_gems_script(:hadoop_fs)).to eql( 40 | hadoop_install_gems_script) 41 | end 42 | 43 | context "with aws cli" do 44 | it "should build script for aws cli" do 45 | expect(subject.install_gems_script(:awscli) ).to eql( 46 | aws_install_gems_script) 47 | end 48 | end 49 | 50 | context "with yield" do 51 | it "should build script for aws cli" do 52 | expect(subject.install_gems_script(:awscli) do |command,file,filename| 53 | "custom command - #{file} #{filename} #{command}" 54 | end).to eql " 55 | # one.gem 56 | custom command - one.gem one.gem aws s3 cp 57 | gem install --force --local one.gem --no-ri --no-rdoc 58 | 59 | # two.gem 60 | custom command - two.gem two.gem aws s3 cp 61 | gem install --force --local two.gem --no-ri --no-rdoc 62 | " 63 | end 64 | end 65 | 66 | context "using the emr context" do 67 | describe "#install_gems_script" do 68 | it "build script using hadoop fs" do 69 | expect(subject.install_gems_script(:hadoop_fs)).to eql " 70 | # one.gem 71 | hadoop fs -copyToLocal one.gem one.gem 72 | gem install --force --local one.gem --no-ri --no-rdoc 73 | 74 | # two.gem 75 | hadoop fs -copyToLocal two.gem two.gem 76 | gem install --force --local two.gem --no-ri --no-rdoc 77 | " 78 | end 79 | end 80 | end 81 | 82 | context "using the emr context" do 83 | let(:emr) { subject.emr } 84 | 85 | describe '#install_gems_script' do 86 | it 'should be same as parent hadoop install script' do 87 | expect(emr.install_gems_script).to eq(hadoop_install_gems_script) 88 | end 89 | end 90 | end 91 | 92 | context "using the ec2 context" do 93 | let(:ec2) { subject.ec2 } 94 | 95 | describe '#install_gems_script' do 96 | it 'should be same as parent aws install script' do 97 | expect(ec2.install_gems_script).to eq(aws_install_gems_script) 98 | end 99 | end 100 | 101 | describe "#as_root" do 102 | 103 | context "on first run" do 104 | it "should build script with ssh init" do 105 | expect(ec2.as_root { "Custom Script here" }).to eql " 106 | # Set up ssh access 107 | if [ ! -f ~/.ssh/id_rsa ]; then 108 | mkdir -p ~/.ssh 109 | ssh-keygen -P '' -f ~/.ssh/id_rsa 110 | cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys 111 | chmod 600 ~/.ssh/authorized_keys 112 | fi 113 | 114 | # Use ssh to bypass the sudo \"require tty\" setting 115 | ssh -o \"StrictHostKeyChecking no\" -t -t ec2-user@localhost <<- EOF 116 | sudo su -; 117 | Custom Script here 118 | # exit twice, once for su and once for ssh 119 | exit; 120 | exit; 121 | EOF 122 | " 123 | end 124 | end 125 | 126 | context "on consective runs" do 127 | it "should build script" do 128 | ec2.as_root { "First run" } 129 | 130 | expect(ec2.as_root { "Second run" }).to eql " 131 | # Use ssh to bypass the sudo \"require tty\" setting 132 | ssh -o \"StrictHostKeyChecking no\" -t -t ec2-user@localhost <<- EOF 133 | sudo su -; 134 | Second run 135 | # exit twice, once for su and once for ssh 136 | exit; 137 | exit; 138 | EOF 139 | " 140 | end 141 | end 142 | end 143 | end 144 | end 145 | end 146 | -------------------------------------------------------------------------------- /spec/lib/pipely/deploy/bootstrap_registry_spec.rb: -------------------------------------------------------------------------------- 1 | # Copyright Swipely, Inc. All rights reserved. 2 | 3 | require 'spec_helper' 4 | require 'pipely/deploy/bootstrap_registry' 5 | 6 | describe Pipely::Deploy::BootstrapRegistry do 7 | 8 | subject { described_class } 9 | 10 | describe "#mixins" do 11 | it "should default to empty" do 12 | expect(subject.mixins).to be_empty 13 | end 14 | end 15 | 16 | describe "#register_mixins" do 17 | context "with a mixin" do 18 | let(:mixin) { "Fixtures::BootstrapContexts::Green" } 19 | let(:result) { [mixin] } 20 | it "should registry mixin" do 21 | expect(subject.register_mixins(mixin)).to eql(result) 22 | expect(subject.mixins).to eql(result) 23 | end 24 | end 25 | 26 | context "when a mixin cannot be required" do 27 | it "should raise" do 28 | expect { subject.register_mixins('bad::mixin') }.to raise_error 29 | end 30 | end 31 | end 32 | end 33 | -------------------------------------------------------------------------------- /spec/lib/pipely/deploy/bootstrap_spec.rb: -------------------------------------------------------------------------------- 1 | # Copyright Swipely, Inc. All rights reserved. 2 | 3 | require 'spec_helper' 4 | require 'pipely/deploy/bootstrap' 5 | require 'pipely/deploy/bootstrap_registry' 6 | require 'fileutils' 7 | require 'fixtures/bootstrap_contexts/simple' 8 | require 'fixtures/bootstrap_contexts/green' 9 | 10 | describe Pipely::Deploy::Bootstrap do 11 | 12 | subject { described_class.new(gem_files, s3_steps_path) } 13 | let(:s3_steps_path) { 'a/test/path' } 14 | let(:gem_files) do 15 | { 16 | 'packaged-gem1' => '/path/to/cache/packaged-gem1.gem', 17 | 'built-from-source-gem1' => '/path/to/cache/built-from-source-gem1.gem', 18 | } 19 | end 20 | 21 | describe "#context" do 22 | context "without any mixins" do 23 | let(:context) { subject.context } 24 | 25 | it "should have s3 steps path" do 26 | expect(context.s3_steps_path).to eq(s3_steps_path) 27 | end 28 | 29 | it "builds S3 urls to the uploaded gem files" do 30 | expect(context.gem_files).to eq(gem_files) 31 | end 32 | end 33 | 34 | context "with one mixin" do 35 | let(:context) { subject.context( mixin.name ) } 36 | let(:mixin) { Fixtures::BootstrapContexts::Simple } 37 | 38 | it "should have Simple mixin method" do 39 | expect(context.simple).to eq("simple") 40 | end 41 | end 42 | 43 | context "with multiple mixins" do 44 | let(:context) { subject.context( mixins.map(&:name) ) } 45 | let(:mixins) do 46 | [Fixtures::BootstrapContexts::Simple,Fixtures::BootstrapContexts::Green] 47 | end 48 | 49 | it "should have simple mixin method" do 50 | expect(context.simple).to eq("simple") 51 | end 52 | 53 | it "should have green mixin method" do 54 | expect(context.green).to eq("green") 55 | end 56 | end 57 | 58 | context "with mixin from BootstrapRegistry" do 59 | let(:context) { subject.context } 60 | before do 61 | Pipely::Deploy::BootstrapRegistry.instance.register_mixins( 62 | "Fixtures::BootstrapContexts::Simple") 63 | end 64 | 65 | it "should have green mixin method" do 66 | expect(context.green).to eq("green") 67 | end 68 | end 69 | end 70 | end 71 | -------------------------------------------------------------------------------- /spec/lib/pipely/deploy/client_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | require 'pipely/deploy' 3 | 4 | describe Pipely::Deploy::Client do 5 | 6 | describe "#deploy_pipeline" do 7 | let(:existing_pipeline_ids) { ["pipeline-one", "pipeline-two"] } 8 | let(:new_pipeline_id) { "pipeline-three" } 9 | let(:pipeline_basename) { "MyPipeline" } 10 | let(:definition) { "pipeline json" } 11 | 12 | it "gets a list of pipelines, creates a new one, and deletes the others" do 13 | subject.should_receive(:existing_pipelines). 14 | and_return(existing_pipeline_ids) 15 | 16 | subject.should_receive(:create_pipeline). 17 | with("#{ENV['USER']}:#{pipeline_basename}", 18 | nil, 19 | hash_including( 'basename' => pipeline_basename ) 20 | ). 21 | and_return(new_pipeline_id) 22 | 23 | existing_pipeline_ids.each do |id| 24 | subject.should_receive(:delete_pipeline).with(id) 25 | end 26 | 27 | subject.deploy_pipeline(pipeline_basename) { definition } 28 | end 29 | end 30 | 31 | describe '#create_pipeline' do 32 | let(:pipeline_name) { 'NewPipeline' } 33 | let(:pipeline_id) { 123 } 34 | let(:created_pipeline) do 35 | double(:created_pipeline, pipeline_id: pipeline_id) 36 | end 37 | let(:definition) { "Pipeline ID: 123" } 38 | 39 | let(:aws) { subject.instance_variable_get(:@aws) } 40 | 41 | it 'gets the definition from the block' do 42 | 43 | Pipely::Deploy::JSONDefinition.should_receive(:parse).with(definition) 44 | 45 | aws.should_receive(:create_pipeline).and_return(created_pipeline) 46 | aws.should_receive(:put_pipeline_definition).and_return({}) 47 | aws.should_receive(:activate_pipeline) 48 | subject.create_pipeline(pipeline_name, nil) do |pipeline_id| 49 | "Pipeline ID: #{pipeline_id}" 50 | end 51 | end 52 | end 53 | 54 | end 55 | -------------------------------------------------------------------------------- /spec/lib/pipely/deploy/s3_uploader_spec.rb: -------------------------------------------------------------------------------- 1 | # Copyright Swipely, Inc. All rights reserved. 2 | 3 | require 'spec_helper' 4 | require 'pipely/deploy/s3_uploader' 5 | require 'tempfile' 6 | 7 | describe Pipely::Deploy::S3Uploader do 8 | 9 | subject { described_class.new(s3_bucket, 'test_path/gems') } 10 | 11 | let(:s3_object) { double(:s3_object) } 12 | let(:bucket_name) { 'a-test-bucket' } 13 | let(:s3_bucket) { double(:bucket, object: s3_object, name: bucket_name) } 14 | 15 | it "should have bucket name" do 16 | expect(subject.bucket_name).to eq('a-test-bucket') 17 | end 18 | 19 | it "should have a s3 path" do 20 | expect(subject.s3_path).to eq('test_path/gems') 21 | end 22 | 23 | describe "#upload(files)" do 24 | let(:files) do 25 | [ 26 | Tempfile.new('packaged-gem1.gem').path, 27 | Tempfile.new('built-from-source-gem1.gem').path, 28 | ] 29 | end 30 | 31 | it 'uploads each file' do 32 | allow(s3_object).to receive(:exists?).and_return(true) 33 | allow(s3_object).to receive(:etag).and_return('mismatch') 34 | files.each do |file| 35 | expect(s3_bucket).to receive(:object).with(subject.s3_file_path(file)) 36 | end 37 | 38 | expect(s3_object).to receive(:put).exactly(files.size).times 39 | 40 | subject.upload(files) 41 | end 42 | end 43 | 44 | end 45 | -------------------------------------------------------------------------------- /spec/lib/pipely/graph_builder_spec.rb: -------------------------------------------------------------------------------- 1 | require 'pipely/graph_builder' 2 | 3 | describe Pipely::GraphBuilder do 4 | 5 | let(:graph) { double(:graph) } 6 | 7 | let(:node1) { 8 | Pipely::Component.new( 9 | :id => '1', 10 | :dependsOn => { 'ref' => '2' }, 11 | ) 12 | } 13 | 14 | let(:node2) { 15 | Pipely::Component.new( 16 | :id => '2', 17 | ) 18 | } 19 | 20 | subject { described_class.new(graph) } 21 | 22 | describe '#build' do 23 | it 'builds a graph from a list of Components' do 24 | graph.should_receive(:add_nodes). 25 | with(node1.id, node1.graphviz_options).ordered 26 | 27 | graph.should_receive(:add_nodes). 28 | with(node2.id, node2.graphviz_options).ordered 29 | 30 | graph.should_receive(:add_edges). 31 | with( 32 | node1.id, 33 | node2.id, 34 | :label => 'dependsOn', 35 | :color => 'black', 36 | ).ordered 37 | 38 | subject.build([node1, node2]) 39 | end 40 | end 41 | 42 | end 43 | -------------------------------------------------------------------------------- /spec/lib/pipely/pipeline_date_time/pipeline_date_pattern_spec.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | require 'pipely/pipeline_date_time/pipeline_date_pattern' 3 | 4 | TestSelection = Struct.new(:num_days_back, :target_date, :target_all_time) 5 | 6 | class TestDatePatternMatcher 7 | attr_accessor :day_offsets, :month_offsets, :year_offsets 8 | 9 | PipelineDate = Pipely::PipelineDateTime::PipelineDate 10 | 11 | def initialize(date_pattern, target_date, sep) 12 | @day_offsets, @month_offsets, @year_offsets = [], [], [] 13 | date_pattern.split(sep).each do |part| 14 | days, format = days_and_format(part, target_date) 15 | case format 16 | when PipelineDate::DEFAULT_YEAR_FORMAT then @year_offsets << days 17 | when PipelineDate::DEFAULT_MONTH_FORMAT then @month_offsets << days 18 | when PipelineDate::DEFAULT_DAY_FORMAT then @day_offsets << days 19 | end 20 | end 21 | end 22 | 23 | private 24 | 25 | def days_and_format(part, target_date) 26 | trimmed = part.gsub("\#{format(", '').gsub(/\"\)}.*/, '') 27 | days_expr, format = trimmed.split(", \"") 28 | if days_expr == target_date 29 | days = 0 30 | else 31 | days = days_expr.gsub("minusDays(#{target_date}, ", '').gsub(')', '') 32 | end 33 | return days.to_i, format 34 | end 35 | end 36 | 37 | class TestPipelineDatePattern 38 | include Pipely::PipelineDateTime::PipelineDatePattern 39 | 40 | attr_reader :selection 41 | 42 | def initialize 43 | @selection = TestSelection.new 44 | @selection.target_all_time = false 45 | end 46 | 47 | def num_days_back=(num_days_back) 48 | @selection.num_days_back = num_days_back 49 | end 50 | 51 | def target_date=(target_date) 52 | @selection.target_date = target_date 53 | end 54 | 55 | def any_string(parts) 56 | if parts.empty? 57 | nil 58 | elsif parts.count == 1 59 | parts.first 60 | else 61 | "#{parts.join('|')}" 62 | end 63 | end 64 | end 65 | 66 | describe TestPipelineDatePattern do 67 | let(:target_date) { '@scheduledStartTime' } 68 | let(:sep) { '|' } 69 | subject { described_class.new } 70 | 71 | before { subject.target_date = target_date } 72 | 73 | context 'with 0 days back' do 74 | before { subject.num_days_back = 0 } 75 | 76 | describe '#date_pattern' do 77 | let(:pattern_matcher) do 78 | TestDatePatternMatcher.new(subject.date_pattern, target_date, sep) 79 | end 80 | 81 | it 'contains just target_date' do 82 | expect(pattern_matcher.day_offsets).to eq([0]) 83 | expect(pattern_matcher.month_offsets).to eq([]) 84 | expect(pattern_matcher.year_offsets).to eq([]) 85 | end 86 | end 87 | end 88 | 89 | context 'with 59 days back' do 90 | before { subject.num_days_back = 59 } 91 | 92 | describe '#date_pattern' do 93 | let(:pattern_matcher) do 94 | TestDatePatternMatcher.new(subject.date_pattern, target_date, sep) 95 | end 96 | 97 | it 'contains 60 individual days' do 98 | expect(pattern_matcher.day_offsets.sort).to eq((0..59).to_a) 99 | end 100 | 101 | it 'contains no months' do 102 | expect(pattern_matcher.month_offsets).to eq([]) 103 | end 104 | 105 | it 'contains no years' do 106 | expect(pattern_matcher.year_offsets).to eq([]) 107 | end 108 | end 109 | end 110 | 111 | context 'with 60 days back' do 112 | before { subject.num_days_back = 60 } 113 | 114 | describe '#date_pattern' do 115 | let(:pattern_matcher) do 116 | TestDatePatternMatcher.new(subject.date_pattern, target_date, sep) 117 | end 118 | 119 | it 'contains 60 individual days' do 120 | expected_days = (0..29).to_a + (31..60).to_a 121 | expect(pattern_matcher.day_offsets.sort).to eq(expected_days) 122 | end 123 | 124 | it 'contains 1 month' do 125 | expect(pattern_matcher.month_offsets).to eq([30]) 126 | end 127 | 128 | it 'contains no years' do 129 | expect(pattern_matcher.year_offsets).to eq([]) 130 | end 131 | end 132 | end 133 | 134 | context 'with 729 days back' do 135 | before { subject.num_days_back = 729 } 136 | 137 | describe '#date_pattern' do 138 | let(:pattern_matcher) do 139 | TestDatePatternMatcher.new(subject.date_pattern, target_date, sep) 140 | end 141 | 142 | it 'contains 60 individual days' do 143 | expected_days = (0..29).to_a + (700..729).to_a 144 | expect(pattern_matcher.day_offsets.sort).to eq(expected_days) 145 | end 146 | 147 | it 'contains 24 individual months' do 148 | expected_months = ((30..674).step(28)).to_a 149 | expect(pattern_matcher.month_offsets.sort).to eq(expected_months) 150 | end 151 | 152 | it 'contains no years' do 153 | expect(pattern_matcher.year_offsets).to eq([]) 154 | end 155 | end 156 | end 157 | 158 | context 'with 730 days back' do 159 | before { subject.num_days_back = 730 } 160 | 161 | describe '#date_pattern' do 162 | let(:pattern_matcher) do 163 | TestDatePatternMatcher.new(subject.date_pattern, target_date, sep) 164 | end 165 | 166 | it 'contains 60 individual days' do 167 | expected_days = (0..29).to_a + (701..730).to_a 168 | expect(pattern_matcher.day_offsets.sort).to eq(expected_days) 169 | end 170 | 171 | it 'contains 24 individual months' do 172 | expected_months = (30..394).step(28).to_a + (422..674).step(28).to_a 173 | expect(pattern_matcher.month_offsets.sort).to eq(expected_months) 174 | end 175 | 176 | it 'contains 1 year' do 177 | expect(pattern_matcher.year_offsets).to eq([365]) 178 | end 179 | end 180 | end 181 | end 182 | -------------------------------------------------------------------------------- /spec/lib/pipely/pipeline_date_time/pipeline_date_range_base_spec.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | require 'pipely/pipeline_date_time/pipeline_date_range_base' 3 | 4 | describe Pipely::PipelineDateTime::PipelineDateRangeBase do 5 | let(:target_date) { '@scheduledStartTime' } 6 | let(:days_back_start) { 5 } 7 | let(:days_back_end) { 0 } 8 | subject { described_class.new(target_date, days_back_start, days_back_end) } 9 | 10 | let(:expected_days_back) { (days_back_end..days_back_start).to_set } 11 | 12 | describe '#days_back' do 13 | it 'returns the expect value' do 14 | expect(subject.days_back).to eq expected_days_back 15 | end 16 | end 17 | 18 | describe '#exclude' do 19 | it 'does not exclude when days_back_start is negative' do 20 | subject.exclude(-1, 0) 21 | expect(subject.days_back).to eq expected_days_back 22 | end 23 | 24 | it 'does not exclude when days_back_end is negative' do 25 | subject.exclude(0, -2) 26 | expect(subject.days_back).to eq expected_days_back 27 | end 28 | 29 | it 'does not exclude when days_back_start is smaller than days_back_end' do 30 | subject.exclude(3, 5) 31 | expect(subject.days_back).to eq expected_days_back 32 | end 33 | 34 | it 'excludes expected offsets' do 35 | subject.exclude(4, 2) 36 | expect(subject.days_back).to eq Set.new([0,1,5]) 37 | end 38 | end 39 | end 40 | -------------------------------------------------------------------------------- /spec/lib/pipely/pipeline_date_time/pipeline_date_spec.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | require 'pipely/pipeline_date_time/pipeline_date' 3 | 4 | describe Pipely::PipelineDateTime::PipelineDate do 5 | let(:target_date) { '@scheduledStartTime' } 6 | 7 | context 'with default time formats' do 8 | context 'with positive num days back' do 9 | let(:num_days_back) { 5 } 10 | subject { described_class.new(target_date, num_days_back) } 11 | 12 | describe '#day' do 13 | let(:result) do 14 | "\#{format(minusDays(@scheduledStartTime, 5), \"YYYY/MM/dd\")}" 15 | end 16 | 17 | it { expect(subject.day).to eq(result) } 18 | end 19 | 20 | describe '#month' do 21 | let(:result) do 22 | "\#{format(minusDays(@scheduledStartTime, 5), \"YYYY/MM\")}/[0-9]+" 23 | end 24 | 25 | it { expect(subject.month).to eq(result) } 26 | end 27 | 28 | describe '#year' do 29 | let(:result) do 30 | "\#{format(minusDays(@scheduledStartTime, 5), "\ 31 | "\"YYYY\")}/[0-9]+/[0-9]+" 32 | end 33 | 34 | it { expect(subject.year).to eq(result) } 35 | end 36 | end 37 | 38 | context 'with 0 days back' do 39 | let(:num_days_back) { 0 } 40 | subject { described_class.new(target_date, num_days_back) } 41 | 42 | describe '#day' do 43 | let(:result) do 44 | "\#{format(@scheduledStartTime, \"YYYY/MM/dd\")}" 45 | end 46 | 47 | it { expect(subject.day).to eq(result) } 48 | end 49 | end 50 | 51 | context 'with negative num days back' do 52 | let(:num_days_back) { -3 } 53 | subject { described_class.new(target_date, num_days_back) } 54 | 55 | describe '#day' do 56 | let(:result) do 57 | "\#{format(plusDays(@scheduledStartTime, 3), \"YYYY/MM/dd\")}" 58 | end 59 | 60 | it { expect(subject.day).to eq(result) } 61 | end 62 | end 63 | end 64 | 65 | context 'with custom date time formats' do 66 | let(:day_format) { 'DAY_FORMAT' } 67 | let(:month_format) { 'MONTH_FORMAT' } 68 | let(:year_format) { 'YEAR_FORMAT' } 69 | 70 | before do 71 | described_class.day_format = day_format 72 | described_class.month_format = month_format 73 | described_class.year_format = year_format 74 | end 75 | 76 | after do 77 | described_class.day_format = described_class::DEFAULT_DAY_FORMAT 78 | described_class.month_format = described_class::DEFAULT_MONTH_FORMAT 79 | described_class.year_format = described_class::DEFAULT_YEAR_FORMAT 80 | end 81 | 82 | context 'with negative num days back' do 83 | let(:num_days_back) { -3 } 84 | subject { described_class.new(target_date, num_days_back) } 85 | 86 | describe '#day' do 87 | let(:result) do 88 | "\#{format(plusDays(@scheduledStartTime, 3), \"#{day_format}\")}" 89 | end 90 | 91 | it { expect(subject.day).to eq(result) } 92 | end 93 | 94 | describe '#month' do 95 | let(:result) do 96 | "\#{format(plusDays(@scheduledStartTime, 3), "\ 97 | "\"#{month_format}\")}/[0-9]+" 98 | end 99 | 100 | it { expect(subject.month).to eq(result) } 101 | end 102 | 103 | describe '#year' do 104 | let(:result) do 105 | "\#{format(plusDays(@scheduledStartTime, 3), "\ 106 | "\"#{year_format}\")}/[0-9]+/[0-9]+" 107 | end 108 | 109 | it { expect(subject.year).to eq(result) } 110 | end 111 | end 112 | end 113 | end 114 | -------------------------------------------------------------------------------- /spec/lib/pipely/pipeline_date_time/pipeline_day_range_spec.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | require 'pipely/pipeline_date_time/pipeline_day_range' 3 | 4 | describe Pipely::PipelineDateTime::PipelineDayRange do 5 | let(:target_date) { '@scheduledStartTime' } 6 | let(:days_back_start) { 2 } 7 | let(:days_back_end) { 0 } 8 | subject { described_class.new(target_date, days_back_start, days_back_end) } 9 | 10 | describe '#days' do 11 | let(:expected_days) do 12 | [ 13 | "\#{format(@scheduledStartTime, \"YYYY/MM/dd\")}", 14 | "\#{format(minusDays(@scheduledStartTime, 1), \"YYYY/MM/dd\")}", 15 | "\#{format(minusDays(@scheduledStartTime, 2), \"YYYY/MM/dd\")}" 16 | ] 17 | end 18 | 19 | it 'returns the expect value' do 20 | expect(subject.days).to eq expected_days 21 | end 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /spec/lib/pipely/pipeline_date_time/pipeline_month_range_spec.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | require 'pipely/pipeline_date_time/pipeline_month_range' 3 | 4 | describe Pipely::PipelineDateTime::PipelineMonthRange do 5 | let(:target_date) { '@scheduledStartTime' } 6 | 7 | context 'with 59 days between start and end' do 8 | let(:days_back_start) { 59 } 9 | let(:days_back_end) { 0 } 10 | subject { described_class.new(target_date, days_back_start, days_back_end) } 11 | 12 | describe '#start' do 13 | it { expect(subject.start).to eq 29 } 14 | end 15 | 16 | describe '#end' do 17 | it { expect(subject.end).to eq 30 } 18 | end 19 | 20 | describe '#months' do 21 | it { expect(subject.months).to eq [] } 22 | end 23 | end 24 | 25 | context 'with 60 days between start and end' do 26 | let(:days_back_start) { 62 } 27 | let(:days_back_end) { 2 } 28 | subject { described_class.new(target_date, days_back_start, days_back_end) } 29 | 30 | describe '#start' do 31 | it { expect(subject.start).to eq 32 } 32 | end 33 | 34 | describe '#end' do 35 | it { expect(subject.end).to eq 32 } 36 | end 37 | 38 | describe '#months' do 39 | let(:expected_months) do 40 | ["\#{format(minusDays(@scheduledStartTime, 32), \"YYYY/MM\")}/[0-9]+"] 41 | end 42 | 43 | it { expect(subject.months).to eq expected_months } 44 | end 45 | end 46 | 47 | context 'with 87 days between start and end' do 48 | let(:days_back_start) { 90 } 49 | let(:days_back_end) { 3 } 50 | subject { described_class.new(target_date, days_back_start, days_back_end) } 51 | 52 | describe '#start' do 53 | it { expect(subject.start).to eq 60 } 54 | end 55 | 56 | describe '#end' do 57 | it { expect(subject.end).to eq 33 } 58 | end 59 | 60 | describe '#months' do 61 | let(:expected_months) do 62 | ["\#{format(minusDays(@scheduledStartTime, 33), \"YYYY/MM\")}/[0-9]+"] 63 | end 64 | 65 | it { expect(subject.months).to eq expected_months } 66 | end 67 | end 68 | 69 | context 'with 88 days between start and end' do 70 | let(:days_back_start) { 92 } 71 | let(:days_back_end) { 4 } 72 | subject { described_class.new(target_date, days_back_start, days_back_end) } 73 | 74 | describe '#start' do 75 | it { expect(subject.start).to eq 62 } 76 | end 77 | 78 | describe '#end' do 79 | it { expect(subject.end).to eq 34 } 80 | end 81 | 82 | describe '#months' do 83 | let(:expected_months) do 84 | [ 85 | "\#{format(minusDays(@scheduledStartTime, 34), \"YYYY/MM\")}/[0-9]+", 86 | "\#{format(minusDays(@scheduledStartTime, 62), \"YYYY/MM\")}/[0-9]+" 87 | ] 88 | end 89 | 90 | it { expect(subject.months).to eq expected_months } 91 | end 92 | end 93 | end 94 | -------------------------------------------------------------------------------- /spec/lib/pipely/pipeline_date_time/pipeline_year_range_spec.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | require 'pipely/pipeline_date_time/pipeline_year_range' 3 | 4 | describe Pipely::PipelineDateTime::PipelineYearRange do 5 | let(:target_date) { '@scheduledStartTime' } 6 | 7 | context 'with 729 days between start and end' do 8 | let(:days_back_start) { 729 } 9 | let(:days_back_end) { 0 } 10 | subject { described_class.new(target_date, days_back_start, days_back_end) } 11 | 12 | describe '#start' do 13 | it { expect(subject.start).to eq 364 } 14 | end 15 | 16 | describe '#end' do 17 | it { expect(subject.end).to eq 365 } 18 | end 19 | 20 | describe '#years' do 21 | it { expect(subject.years).to eq [] } 22 | end 23 | end 24 | 25 | context 'with 730 days between start and end' do 26 | let(:days_back_start) { 731 } 27 | let(:days_back_end) { 1 } 28 | subject { described_class.new(target_date, days_back_start, days_back_end) } 29 | 30 | describe '#start' do 31 | it { expect(subject.start).to eq 366 } 32 | end 33 | 34 | describe '#end' do 35 | it { expect(subject.end).to eq 366 } 36 | end 37 | 38 | describe '#years' do 39 | let(:expected_years) do 40 | ["\#{format(minusDays(@scheduledStartTime, 366), "\ 41 | "\"YYYY\")}/[0-9]+/[0-9]+"] 42 | end 43 | 44 | it { expect(subject.years).to eq expected_years } 45 | end 46 | end 47 | 48 | context 'with 1094 days between start and end' do 49 | let(:days_back_start) { 1096 } 50 | let(:days_back_end) { 2 } 51 | subject { described_class.new(target_date, days_back_start, days_back_end) } 52 | 53 | describe '#start' do 54 | it { expect(subject.start).to eq 731 } 55 | end 56 | 57 | describe '#end' do 58 | it { expect(subject.end).to eq 367 } 59 | end 60 | 61 | describe '#years' do 62 | let(:expected_years) do 63 | ["\#{format(minusDays(@scheduledStartTime, 367), "\ 64 | "\"YYYY\")}/[0-9]+/[0-9]+"] 65 | end 66 | 67 | it { expect(subject.years).to eq expected_years } 68 | end 69 | end 70 | 71 | context 'with 1095 days between start and end' do 72 | let(:days_back_start) { 1098 } 73 | let(:days_back_end) { 3 } 74 | subject { described_class.new(target_date, days_back_start, days_back_end) } 75 | 76 | describe '#start' do 77 | it { expect(subject.start).to eq 733 } 78 | end 79 | 80 | describe '#end' do 81 | it { expect(subject.end).to eq 368 } 82 | end 83 | 84 | describe '#years' do 85 | let(:expected_years) do 86 | [ 87 | "\#{format(minusDays(@scheduledStartTime, 368), "\ 88 | "\"YYYY\")}/[0-9]+/[0-9]+", 89 | "\#{format(minusDays(@scheduledStartTime, 733), "\ 90 | "\"YYYY\")}/[0-9]+/[0-9]+" 91 | ] 92 | end 93 | 94 | it { expect(subject.years).to eq expected_years } 95 | end 96 | end 97 | end 98 | -------------------------------------------------------------------------------- /spec/lib/pipely/reference_list_spec.rb: -------------------------------------------------------------------------------- 1 | require 'pipely/reference_list' 2 | 3 | describe Pipely::ReferenceList do 4 | 5 | context 'given nil input' do 6 | subject { described_class.new(nil) } 7 | 8 | describe '#build_dependencies' do 9 | it 'returns an empty array' do 10 | expect(subject.build_dependencies('dependsOn')).to eq([]) 11 | end 12 | end 13 | end 14 | 15 | context 'given a single input' do 16 | subject { described_class.new({ 'ref' => 'foo' }) } 17 | 18 | describe '#build_dependencies' do 19 | it 'returns an array of the single reference' do 20 | expect(subject.build_dependencies('dependsOn')).to eq([ 21 | Pipely::Dependency.new('dependsOn', 'foo'), 22 | ]) 23 | end 24 | end 25 | end 26 | 27 | context 'given an array of references as input' do 28 | subject { 29 | described_class.new([ 30 | { 'ref' => 'foo' }, 31 | { 'ref' => 'bar' }, 32 | ]) 33 | } 34 | 35 | describe '#build_dependencies' do 36 | it 'returns an array of the single reference' do 37 | expect(subject.build_dependencies('dependsOn')).to eq([ 38 | Pipely::Dependency.new('dependsOn', 'foo'), 39 | Pipely::Dependency.new('dependsOn', 'bar'), 40 | ]) 41 | end 42 | end 43 | end 44 | 45 | end 46 | -------------------------------------------------------------------------------- /spec/lib/pipely/tasks/upload_pipeline_as_gem_spec.rb: -------------------------------------------------------------------------------- 1 | # Copyright Swipely, Inc. All rights reserved. 2 | 3 | require 'spec_helper' 4 | require 'pipely/tasks/upload_pipeline_as_gem' 5 | 6 | describe Pipely::Tasks::UploadPipelineAsGem do 7 | 8 | describe "#run_task" do 9 | subject do 10 | described_class.new.tap do |task| 11 | task.config = config 12 | task.s3_steps_path = s3_steps_path 13 | task.bucket_name = bucket_name 14 | task.s3_gems_path = s3_gems_path 15 | task.templates = templates 16 | end 17 | end 18 | let(:config) do 19 | { "bootstrap_mixins" => "Fixtures::BootstrapContexts::Simple" } 20 | end 21 | let(:gem_files) do 22 | { 23 | 'packaged-gem1' => '/path/to/cache/packaged-gem1.gem', 24 | 'built-from-source-gem1' => '/path/to/cache/built-from-source-gem1.gem', 25 | } 26 | end 27 | let(:bucket_name) { 'bucket-test' } 28 | let(:s3_steps_path) { "s3/steps" } 29 | let(:s3_gems_path) { "s3/gems" } 30 | let(:templates) { ['spec/fixtures/templates/bootstrap.sh.erb']} 31 | 32 | before do 33 | allow(Rake::Task).to receive(:[]).with("upload_steps") do 34 | double(enhance: ["deploy:upload_pipeline_as_gem"]) 35 | end 36 | 37 | # Resolves gems for Pipeline 38 | expect(Pipely::Bundler).to receive(:gem_files) { gem_files } 39 | 40 | # Uploads gems to S3 41 | expect(Pipely::Deploy::S3Uploader).to receive(:new) do 42 | mock( 43 | upload: gem_files.values, 44 | s3_urls: gem_files.values 45 | ) 46 | end 47 | 48 | # Compiles the erb, using the configued mixin 49 | expect(subject).to receive(:upload_to_s3).with( 50 | "bootstrap.sh", "one\ntwo\nthree\nsimple\n") 51 | end 52 | 53 | it "should invoke" do 54 | # All the magic happens in the mocks 55 | expect(subject.run_task(true)).to eql( 56 | ["spec/fixtures/templates/bootstrap.sh.erb"]) 57 | end 58 | end 59 | end 60 | -------------------------------------------------------------------------------- /spec/lib/pipely_spec.rb: -------------------------------------------------------------------------------- 1 | require 'pipely' 2 | 3 | describe Pipely do 4 | let(:definition_json) { double } 5 | let(:filename) { 'path/to/graph.png' } 6 | let(:definition) { double } 7 | 8 | before do 9 | Pipely::Definition.stub(:parse).with(definition_json) { definition } 10 | end 11 | 12 | describe '.draw' do 13 | let(:components) { double } 14 | let(:definition) { 15 | double(:definition, :components_for_graph => components) 16 | } 17 | let(:graph) { double(:graph, :output => nil) } 18 | 19 | before do 20 | Pipely::GraphBuilder.any_instance.stub(:build).with(components) { graph } 21 | end 22 | 23 | it 'parses a JSON definition and builds a graph' do 24 | graph.should_receive(:output).with(:png => filename) 25 | 26 | described_class.draw(definition_json, filename) 27 | end 28 | 29 | context 'with component_attributes' do 30 | let(:component_attributes) { double } 31 | 32 | it 'applies the component_attributes to the definition' do 33 | definition.should_receive(:apply_component_attributes). 34 | with(component_attributes) 35 | 36 | described_class.draw(definition_json, filename, component_attributes) 37 | end 38 | end 39 | end 40 | 41 | end 42 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | require 'timecop' 2 | require 'aws-sdk' 3 | require 'rspec' 4 | require 'vcr' 5 | require 'pry' 6 | 7 | $LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'lib') 8 | 9 | Aws.config.update({ 10 | region: 'us-east-1', 11 | credentials: Aws::Credentials.new('xxx', 'xxx'), 12 | }) 13 | 14 | VCR.configure do |c| 15 | c.allow_http_connections_when_no_cassette = true 16 | c.cassette_library_dir = 'spec/fixtures/vcr_cassettes' 17 | c.hook_into :webmock 18 | end 19 | 20 | class WebMock::StubSocket 21 | attr_accessor :continue_timeout, :read_timeout 22 | 23 | def closed? 24 | true 25 | end 26 | end 27 | --------------------------------------------------------------------------------