├── .circleci
    └── config.yml
├── .components
├── .dockerignore
├── .github
    └── workflows
    │   └── ruby.yml
├── .gitignore
├── .rubocop.yml
├── .ruby-gemset
├── .ruby-version
├── API.md
├── CONTRIBUTING.md
├── DICTIONARY.md
├── Dockerfile
├── Gemfile
├── Gemfile.lock
├── INSTALL.md
├── LICENSE.md
├── Procfile
├── README.md
├── Rakefile
├── app
    ├── app.rb
    ├── controllers.rb
    ├── index_app.rb
    ├── stylesheets
    │   └── application.sass
    └── views
    │   ├── category.liquid
    │   ├── home.liquid
    │   └── layouts
    │       └── application.erb
├── bin
    └── open-data-maker
├── config.ru
├── config
    ├── apps.rb
    ├── boot.rb
    ├── env.rb
    ├── puma.rb
    └── unicorn.rb
├── docker-compose.yml
├── lib
    ├── data_magic.rb
    ├── data_magic
    │   ├── category.rb
    │   ├── config.rb
    │   ├── error_checker.rb
    │   ├── example.rb
    │   ├── index.rb
    │   ├── index
    │   │   ├── builder_data.rb
    │   │   ├── document.rb
    │   │   ├── document_builder.rb
    │   │   ├── event_logger.rb
    │   │   ├── importer.rb
    │   │   ├── output.rb
    │   │   ├── repository.rb
    │   │   ├── row_bulk_importer.rb
    │   │   ├── row_importer.rb
    │   │   ├── row_map.rb
    │   │   └── super_client.rb
    │   ├── nested_query_builder.rb
    │   └── query_builder.rb
    ├── expression
    │   ├── eval.rb
    │   ├── expression.rb
    │   ├── parser.rb
    │   └── variables.rb
    ├── nested_hash.rb
    ├── sass_initializer.rb
    └── zipcode
    │   ├── us_zipcodes.txt
    │   └── zipcode.rb
├── log
    └── .gitkeep
├── public
    ├── favicon.ico
    └── javascripts
    │   ├── application.js
    │   ├── jquery-ujs.js
    │   └── jquery.js
├── sample-data
    ├── cities100.csv
    └── data.yaml
├── script
    ├── bomstrip.sh
    ├── bootstrap
    ├── makeutf8.sh
    ├── s3config.rb
    ├── s3pull
    └── s3push
├── spec
    ├── features
    │   ├── api_spec.rb
    │   └── web_spec.rb
    ├── fixtures
    │   ├── bom
    │   │   ├── bom.csv
    │   │   └── data.yaml
    │   ├── calculated_columns
    │   │   ├── data.yaml
    │   │   └── schools.csv
    │   ├── cities_with_yml
    │   │   ├── cities50.csv
    │   │   ├── cities51-100.csv
    │   │   ├── data.yml
    │   │   └── more.csv
    │   ├── cities_without_yml
    │   │   ├── cities50.csv
    │   │   ├── cities51-100.csv
    │   │   └── more.csv
    │   ├── data.rb
    │   ├── geo
    │   │   ├── data.yaml
    │   │   └── places.csv
    │   ├── geo_no_files
    │   │   └── data.yaml
    │   ├── import_with_dictionary
    │   │   ├── cities50.csv
    │   │   ├── cities51-100.csv
    │   │   ├── data.yaml
    │   │   └── more.csv
    │   ├── import_with_errors
    │   │   ├── cities4.csv
    │   │   └── data.yaml
    │   ├── import_with_null_value
    │   │   ├── data.yaml
    │   │   └── null_values.csv
    │   ├── import_with_options
    │   │   ├── cities4.csv
    │   │   ├── data.yaml
    │   │   └── more_cities.csv
    │   ├── invalid_utf8.csv
    │   ├── minimal
    │   │   └── data.yaml
    │   ├── nested2
    │   │   ├── data.yaml
    │   │   └── school2013.csv
    │   ├── nested_data_type
    │   │   └── data.yml
    │   ├── nested_delta_files
    │   │   ├── data.yaml
    │   │   ├── delta
    │   │   │   └── latest-school-data_update1.csv
    │   │   ├── latest-school-data.csv
    │   │   ├── school2012.csv
    │   │   └── school2013.csv
    │   ├── nested_files
    │   │   ├── data.yaml
    │   │   ├── school-data.csv
    │   │   ├── school2011.csv
    │   │   ├── school2012.csv
    │   │   └── school2013.csv
    │   ├── numeric_data
    │   │   └── data.yaml
    │   ├── sample-data
    │   │   ├── cities100.csv
    │   │   └── data.yaml
    │   ├── school_names
    │   │   ├── data.yaml
    │   │   └── school_names.csv
    │   ├── schools
    │   │   ├── data.yaml
    │   │   └── schools.csv
    │   └── types
    │   │   ├── data.yaml
    │   │   └── places.csv
    ├── lib
    │   ├── data_magic
    │   │   ├── calculated_columns_spec.rb
    │   │   ├── config_field_types_spec.rb
    │   │   ├── config_spec.rb
    │   │   ├── create_index_spec.rb
    │   │   ├── error_checker_spec.rb
    │   │   ├── example_spec.rb
    │   │   ├── import_csv_spec.rb
    │   │   ├── import_with_delta_file_spec.rb
    │   │   ├── import_with_dictionary_spec.rb
    │   │   ├── import_with_nested_files_spec.rb
    │   │   ├── import_without_data_yaml_spec.rb
    │   │   ├── index
    │   │   │   ├── document_builder_spec.rb
    │   │   │   ├── document_spec.rb
    │   │   │   ├── event_logger_spec.rb
    │   │   │   ├── importer_spec.rb
    │   │   │   └── repository_spec.rb
    │   │   ├── name_type_spec.rb
    │   │   ├── nested_and_non_nested_combos_spec.rb
    │   │   ├── nested_data_type_spec.rb
    │   │   ├── query_builder_spec.rb
    │   │   ├── search_name_spec.rb
    │   │   └── search_spec.rb
    │   ├── data_magic_spec.rb
    │   ├── expression
    │   │   ├── eval_spec.rb
    │   │   ├── parser_spec.rb
    │   │   └── variables_spec.rb
    │   ├── expression_spec.rb
    │   ├── nested_hash_spec.rb
    │   └── zipcode_spec.rb
    ├── spec.rake
    ├── spec_helper.rb
    └── tasks
    │   └── import_spec.rb
├── tasks
    ├── delta.rake
    ├── es.rake
    └── import.rake
├── test_coverage_snapshots
    ├── 10_3_19
    │   ├── .last_run.json
    │   ├── .resultset.json
    │   ├── .resultset.json.lock
    │   ├── assets
    │   │   └── 0.10.2
    │   │   │   ├── application.css
    │   │   │   ├── application.js
    │   │   │   ├── colorbox
    │   │   │       ├── border.png
    │   │   │       ├── controls.png
    │   │   │       ├── loading.gif
    │   │   │       └── loading_background.png
    │   │   │   ├── favicon_green.png
    │   │   │   ├── favicon_red.png
    │   │   │   ├── favicon_yellow.png
    │   │   │   ├── loading.gif
    │   │   │   ├── magnify.png
    │   │   │   └── smoothness
    │   │   │       └── images
    │   │   │           ├── ui-bg_flat_0_aaaaaa_40x100.png
    │   │   │           ├── ui-bg_flat_75_ffffff_40x100.png
    │   │   │           ├── ui-bg_glass_55_fbf9ee_1x400.png
    │   │   │           ├── ui-bg_glass_65_ffffff_1x400.png
    │   │   │           ├── ui-bg_glass_75_dadada_1x400.png
    │   │   │           ├── ui-bg_glass_75_e6e6e6_1x400.png
    │   │   │           ├── ui-bg_glass_95_fef1ec_1x400.png
    │   │   │           ├── ui-bg_highlight-soft_75_cccccc_1x100.png
    │   │   │           ├── ui-icons_222222_256x240.png
    │   │   │           ├── ui-icons_2e83ff_256x240.png
    │   │   │           ├── ui-icons_454545_256x240.png
    │   │   │           ├── ui-icons_888888_256x240.png
    │   │   │           └── ui-icons_cd0a0a_256x240.png
    │   └── index.html
    └── 3_17_20
    │   ├── .last_run.json
    │   ├── .resultset.json
    │   ├── .resultset.json.lock
    │   ├── assets
    │       └── 0.10.2
    │       │   ├── application.css
    │       │   ├── application.js
    │       │   ├── colorbox
    │       │       ├── border.png
    │       │       ├── controls.png
    │       │       ├── loading.gif
    │       │       └── loading_background.png
    │       │   ├── favicon_green.png
    │       │   ├── favicon_red.png
    │       │   ├── favicon_yellow.png
    │       │   ├── loading.gif
    │       │   ├── magnify.png
    │       │   └── smoothness
    │       │       └── images
    │       │           ├── ui-bg_flat_0_aaaaaa_40x100.png
    │       │           ├── ui-bg_flat_75_ffffff_40x100.png
    │       │           ├── ui-bg_glass_55_fbf9ee_1x400.png
    │       │           ├── ui-bg_glass_65_ffffff_1x400.png
    │       │           ├── ui-bg_glass_75_dadada_1x400.png
    │       │           ├── ui-bg_glass_75_e6e6e6_1x400.png
    │       │           ├── ui-bg_glass_95_fef1ec_1x400.png
    │       │           ├── ui-bg_highlight-soft_75_cccccc_1x100.png
    │       │           ├── ui-icons_222222_256x240.png
    │       │           ├── ui-icons_2e83ff_256x240.png
    │       │           ├── ui-icons_454545_256x240.png
    │       │           ├── ui-icons_888888_256x240.png
    │       │           └── ui-icons_cd0a0a_256x240.png
    │   └── index.html
└── wait-for-it.sh


/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | shared: &shared
 4 |   working_directory: ~/repo
 5 |   steps:
 6 |     - checkout
 7 | 
 8 |     # Restore bundle cache
 9 |     - restore_cache:
10 |         keys:
11 |           - bundle-cache-{{ checksum "Gemfile.lock" }}
12 |           - bundle-cache-
13 | 
14 |     - run:
15 |         name: Bundle Install
16 |         command: bundle install --path vendor/bundle
17 | 
18 |     # Store bundle cache
19 |     - save_cache:
20 |         key: bundle-cache-{{ checksum "Gemfile.lock" }}
21 |         paths:
22 |           - vendor/bundle
23 | 
24 |     - run:
25 |         name: Wait for Elasticsearch
26 |         command: |
27 |           while ! curl -sS --fail http://localhost:9200 > /dev/null 2>&1; do
28 |             sleep 5
29 |           done
30 | 
31 |     - run:
32 |         name: Create Test Directory
33 |         command: mkdir ~/rspec
34 | 
35 |     - run:
36 |         name: RSpec
37 |         command: bundle exec rspec --format progress --format RspecJunitFormatter -o ~/rspec/rspec.xml
38 | 
39 |   # Save test results for timing analysis
40 |     - store_test_results:
41 |         path: ~/rspec
42 | 
43 | jobs:
44 |   test-odm:
45 |     <<: *shared
46 |     docker:
47 |       - image: circleci/ruby:2.6.6
48 |         environment:
49 |           BUNDLE_JOBS: 3
50 |           BUNDLE_RETRY: 3
51 |           BUNDLE_PATH: vendor/bundle
52 |           RACK_ENV: test
53 |       - image: elasticsearch:2.4.4
54 |         environment:
55 |           ES_JAVA_OPTS: "-Dmapper.allow_dots_in_name=true"
56 | 
57 | 
58 | workflows:
59 |   version: 2
60 |   build:
61 |     jobs:
62 |       - test-odm
63 | 


--------------------------------------------------------------------------------
/.components:
--------------------------------------------------------------------------------
 1 | ---
 2 | :orm: none
 3 | :test: rspec
 4 | :mock: none
 5 | :script: jquery
 6 | :renderer: liquid
 7 | :stylesheet: sass
 8 | :namespace: OpenDataMaker
 9 | :migration_format: number
10 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | .dockerignore
2 | real-data


--------------------------------------------------------------------------------
/.github/workflows/ruby.yml:
--------------------------------------------------------------------------------
 1 | # This workflow uses actions that are not certified by GitHub.
 2 | # They are provided by a third-party and are governed by
 3 | # separate terms of service, privacy policy, and support
 4 | # documentation.
 5 | # This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
 6 | # For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
 7 | 
 8 | name: Ruby
 9 | env: 
10 |   ES_JAVA_OPTS: "-Dmapper.allow_dots_in_name=true"
11 |   BUNDLE_JOBS: 3
12 |   BUNDLE_RETRY: 3
13 |   BUNDLE_PATH: vendor/bundle
14 |   RACK_ENV: test
15 | on:
16 |   push:
17 |     branches: [ dev, staging ]
18 |   pull_request:
19 |     branches: [ dev ]
20 | 
21 | jobs:
22 |   test:
23 | 
24 |     runs-on: ubuntu-18.04
25 | 
26 |     steps:
27 |     - uses: actions/checkout@v2
28 |     - name: Set up Ruby
29 |       uses: ruby/setup-ruby@v1
30 |       with:
31 |         ruby-version: 2.6.6
32 | 
33 |     - name: Setup ElasticSearch
34 |       uses: getong/elasticsearch-action@v1.2
35 |       with:
36 |         elasticsearch version: 2.4.4
37 |     - uses: actions/cache@v2
38 |       with:
39 |         path: vendor/bundle
40 |         key: ${{ runner.os }}-gems-${{ hashFiles('**/Gemfile.lock') }}
41 |         restore-keys: |
42 |           ${{ runner.os }}-gems-
43 |     - name: Bundle install
44 |       run: |
45 |         bundle config path vendor/bundle
46 |         bundle install --jobs 4 --retry 3
47 |     - name: Wait for Elasticsearch
48 |       run: |
49 |         while ! curl -sS --fail http://localhost:9200 > /dev/null 2>&1; do
50 |           sleep 5
51 |         done
52 | 
53 |     - name: Create Test Directory
54 |       run: mkdir ~/rspec  
55 |     
56 |     - name: RSpec
57 |       run: bundle exec rspec --format progress --format RspecJunitFormatter -o ~/rspec/rspec.xml
58 | 
59 |     - name: Save assets
60 |       uses: actions/upload-artifact@v2
61 |       with:
62 |         name: test-output
63 |         path: ~/rspec
64 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | log/**/*
 3 | log/*.log
 4 | tmp/**/*
 5 | vendor/gems/*
 6 | !vendor/gems/cache/
 7 | .sass-cache/*
 8 | db/*.db
 9 | .*.sw*
10 | .env
11 | .*.env
12 | .cfignore
13 | cf-ssh.yml
14 | .vagrant
15 | .idea/
16 | *profile*
17 | coverage/
18 | .bundle
19 | .rakeTasks
20 | 
21 | public/stylesheets/application.css*
22 | 
23 | # expect people to put their own data in /data
24 | data
25 | 
26 | # another commonly used data directory
27 | real-data
28 | 
29 | # contains Google API tokens
30 | client_secret.json
31 | 
32 | # Docker Data files
33 | ./docker/data/es*
34 | bin


--------------------------------------------------------------------------------
/.rubocop.yml:
--------------------------------------------------------------------------------
 1 | AllCops:
 2 |   Exclude:
 3 |     - 'bin/**/*'
 4 |     - 'db/**/*'
 5 | Metrics/LineLength:
 6 |   Enabled: false
 7 | Style/CommentAnnotation:
 8 |   Enabled: false
 9 | Style/Documentation:
10 |   Enabled: false
11 | Style/DotPosition:
12 |   Enabled: false
13 | Style/RedundantSelf:
14 |   Enabled: false
15 | Style/StringLiterals:
16 |   Enabled: false
17 | 


--------------------------------------------------------------------------------
/.ruby-gemset:
--------------------------------------------------------------------------------
1 | open-data-maker
2 | 


--------------------------------------------------------------------------------
/.ruby-version:
--------------------------------------------------------------------------------
1 | 2.6.6
2 | 


--------------------------------------------------------------------------------
/DICTIONARY.md:
--------------------------------------------------------------------------------
 1 | # Data
 2 | 
 3 | Details about the data are specified by DATA_PATH/data.yaml.  
 4 | Where DATA_PATH is an environment variable, which may be:
 5 | 
 6 | * `s3://username:password@bucket_name/path`
 7 | * `s3://bucket_name/path`
 8 | * `s3://bucket_name`
 9 | * a local path like: `./data`
10 | 
11 | 
12 | This file is loaded the first time it is needed and then stored in memory.  The contents of `data.yaml` are stored as JSON in Elasticsearch in a single document of type `config` with id `1`.  
13 | 
14 | The version field of this document is checked at startup. If the new config has a new version, then we delete the whole index and re-index all of the files referred to in the `data.yaml` files section.
15 | 
16 | If no data.yml or data.yaml file is found, then all CSV files in `DATA_PATH` will be loaded, and all fields in their headers will be used.
17 | 
18 | For an example data file, visit https://collegescorecard.ed.gov/data/ and download the full data package. A data.yaml file will be included in the ZIP file download. 
19 | 
20 | # Dictionary Format
21 | 
22 | The data dictionary format may be (optionally) specified in the `data.yaml` file.  If unspecified, all columns are imported as strings.
23 | 
24 | ## Simple Data Types
25 | 
26 | ```
27 | dictionary:
28 |   name:
29 |     source: COLUMN_NAME
30 |     type: integer
31 |     description: explanation of where this data comes from and its meaning
32 | ```
33 | 
34 | In the above example:
35 | * `source:` is the name of the column in the csv. (This doesn't have to be all caps, we just find that to be common in government datasets.)
36 | * `type:` may be `integer`,  `float`, `string`
37 | * `description:` text description suitable for developer documentation or information provided to data analysts
38 | 
39 | ## Calculated columns
40 | 
41 | Optionally, you can add "columns" by calculating fields at import based on multiple csv columns.  
42 | 
43 | ```
44 | academics.program.degree.health:
45 |   calculate: CIP51ASSOC or CIP51BACHL
46 |   type: integer
47 |   description: Associate or Bachelor's degree in Health
48 | ```
49 | 
50 | Multiple operations are supported.  In the following example, if the columns `apples`, `oranges` and `plums` had a `0` value when there were none, and a `1` to represent if they were available, then these values could be combines with `or` to create a data field representing if any were true.
51 | 
52 | ```
53 | fruit:
54 |   calculate: apples or oranges or plums
55 |   type: integer
56 |   description: is there any fruit available?
57 | ```
58 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ruby:2.6.6
2 | RUN mkdir /myapp
3 | WORKDIR /myapp
4 | COPY Gemfile /myapp/Gemfile
5 | COPY Gemfile.lock /myapp/Gemfile.lock
6 | RUN bundle install
7 | COPY . /myapp
8 | RUN chmod +x /myapp/wait-for-it.sh
9 | EXPOSE 3000


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
 1 | source 'https://rubygems.org'
 2 | ruby '2.6.6'
 3 | 
 4 | # Distribute your app as a gem
 5 | # gemspec
 6 | 
 7 | # Server requirements
 8 | # gem 'thin' # or mongrel
 9 | # gem 'trinidad', :platform => 'jruby'
10 | 
11 | # Optional JSON codec (faster performance)
12 | # gem 'oj'
13 | 
14 | # open-data-maker requirements
15 | gem 'elasticsearch'
16 | gem 'typhoeus'
17 | gem 'stretchy'
18 | gem 'hashie'
19 | gem 'cf-app-utils'
20 | #gem 'unicorn'
21 | gem 'puma', '~>4.3'
22 | gem 'safe_yaml'
23 | gem 'aws-sdk', '~> 2'
24 | gem 'actionview', '6.0.3.4'
25 | gem 'dotenv'
26 | gem 'oj'
27 | gem 'parslet'
28 | gem 'parallel'
29 | 
30 | # Project requirements
31 | gem 'rake'
32 | 
33 | # Component requirements
34 | gem 'sass'
35 | gem 'liquify'
36 | gem 'liquid', '= 3.0.3'
37 | gem 'erubis'
38 | 
39 | # Test requirements
40 | group :test do
41 |   gem 'rspec'
42 |   gem 'rspec-mocks'
43 |   gem 'rack-test', :require => 'rack/test'
44 |   gem 'rspec_junit_formatter'
45 |   gem 'simplecov', require: false
46 | end
47 | 
48 | group 'dev' do
49 |   gem 'ruby-prof'
50 | 
51 | end
52 | # Padrino Stable Gem
53 | gem 'padrino', '~> 0.15'
54 | 
55 | gem 'pry', :group => ['development', 'test']
56 | gem 'pry-byebug', :group => ['development', 'test']
57 | gem 'newrelic_rpm'
58 | 
59 | # Or Padrino Edge
60 | # gem 'padrino', :github => 'padrino/padrino-framework'
61 | 
62 | # Or Individual Gems
63 | # %w(core support gen helpers cache mailer admin).each do |g|
64 | #   gem 'padrino-' + g, '0.12.5'
65 | # end
66 | 


--------------------------------------------------------------------------------
/Gemfile.lock:
--------------------------------------------------------------------------------
  1 | GEM
  2 |   remote: https://rubygems.org/
  3 |   specs:
  4 |     actionview (6.0.3.4)
  5 |       activesupport (= 6.0.3.4)
  6 |       builder (~> 3.1)
  7 |       erubi (~> 1.4)
  8 |       rails-dom-testing (~> 2.0)
  9 |       rails-html-sanitizer (~> 1.1, >= 1.2.0)
 10 |     activesupport (6.0.3.4)
 11 |       concurrent-ruby (~> 1.0, >= 1.0.2)
 12 |       i18n (>= 0.7, < 2)
 13 |       minitest (~> 5.1)
 14 |       tzinfo (~> 1.1)
 15 |       zeitwerk (~> 2.2, >= 2.2.2)
 16 |     aws-eventstream (1.1.0)
 17 |     aws-sdk (2.11.632)
 18 |       aws-sdk-resources (= 2.11.632)
 19 |     aws-sdk-core (2.11.632)
 20 |       aws-sigv4 (~> 1.0)
 21 |       jmespath (~> 1.0)
 22 |     aws-sdk-resources (2.11.632)
 23 |       aws-sdk-core (= 2.11.632)
 24 |     aws-sigv4 (1.2.2)
 25 |       aws-eventstream (~> 1, >= 1.0.2)
 26 |     builder (3.2.4)
 27 |     byebug (11.1.3)
 28 |     cf-app-utils (0.6)
 29 |     coderay (1.1.3)
 30 |     concurrent-ruby (1.1.7)
 31 |     crass (1.0.6)
 32 |     diff-lcs (1.4.4)
 33 |     docile (1.3.4)
 34 |     dotenv (2.7.6)
 35 |     elasticsearch (5.0.5)
 36 |       elasticsearch-api (= 5.0.5)
 37 |       elasticsearch-transport (= 5.0.5)
 38 |     elasticsearch-api (5.0.5)
 39 |       multi_json
 40 |     elasticsearch-transport (5.0.5)
 41 |       faraday
 42 |       multi_json
 43 |     erubi (1.10.0)
 44 |     erubis (2.7.0)
 45 |     ethon (0.12.0)
 46 |       ffi (>= 1.3.0)
 47 |     excon (0.78.1)
 48 |     faraday (1.3.0)
 49 |       faraday-net_http (~> 1.0)
 50 |       multipart-post (>= 1.2, < 3)
 51 |       ruby2_keywords
 52 |     faraday-net_http (1.0.0)
 53 |     ffi (1.14.2)
 54 |     hashie (4.1.0)
 55 |     i18n (1.8.7)
 56 |       concurrent-ruby (~> 1.0)
 57 |     jmespath (1.4.0)
 58 |     liquid (3.0.3)
 59 |     liquify (0.2.7)
 60 |       liquid (>= 2.2.2)
 61 |     loofah (2.8.0)
 62 |       crass (~> 1.0.2)
 63 |       nokogiri (>= 1.5.9)
 64 |     mail (2.7.1)
 65 |       mini_mime (>= 0.1.1)
 66 |     method_source (1.0.0)
 67 |     mime-types (3.3.1)
 68 |       mime-types-data (~> 3.2015)
 69 |     mime-types-data (3.2020.1104)
 70 |     mini_mime (1.0.2)
 71 |     mini_portile2 (2.5.0)
 72 |     minitest (5.14.3)
 73 |     moneta (1.1.1)
 74 |     multi_json (1.15.0)
 75 |     multipart-post (2.1.1)
 76 |     mustermann (1.1.1)
 77 |       ruby2_keywords (~> 0.0.1)
 78 |     newrelic_rpm (6.14.0)
 79 |     nio4r (2.5.4)
 80 |     nokogiri (1.11.1)
 81 |       mini_portile2 (~> 2.5.0)
 82 |       racc (~> 1.4)
 83 |     oj (3.10.18)
 84 |     padrino (0.15.0)
 85 |       padrino-admin (= 0.15.0)
 86 |       padrino-cache (= 0.15.0)
 87 |       padrino-core (= 0.15.0)
 88 |       padrino-gen (= 0.15.0)
 89 |       padrino-helpers (= 0.15.0)
 90 |       padrino-mailer (= 0.15.0)
 91 |       padrino-support (= 0.15.0)
 92 |     padrino-admin (0.15.0)
 93 |       padrino-core (= 0.15.0)
 94 |       padrino-helpers (= 0.15.0)
 95 |     padrino-cache (0.15.0)
 96 |       moneta (~> 1.1.0)
 97 |       padrino-core (= 0.15.0)
 98 |       padrino-helpers (= 0.15.0)
 99 |     padrino-core (0.15.0)
100 |       padrino-support (= 0.15.0)
101 |       sinatra (>= 2.0.0)
102 |       thor (~> 0.18)
103 |     padrino-gen (0.15.0)
104 |       bundler (>= 1.0, < 3)
105 |       padrino-core (= 0.15.0)
106 |     padrino-helpers (0.15.0)
107 |       i18n (>= 0.6.7, < 2)
108 |       padrino-support (= 0.15.0)
109 |       tilt (>= 1.4.1, < 3)
110 |     padrino-mailer (0.15.0)
111 |       mail (~> 2.5)
112 |       mime-types (< 4)
113 |       padrino-core (= 0.15.0)
114 |     padrino-support (0.15.0)
115 |     parallel (1.20.1)
116 |     parslet (2.0.0)
117 |     pry (0.13.1)
118 |       coderay (~> 1.1)
119 |       method_source (~> 1.0)
120 |     pry-byebug (3.9.0)
121 |       byebug (~> 11.0)
122 |       pry (~> 0.13.0)
123 |     puma (4.3.7)
124 |       nio4r (~> 2.0)
125 |     racc (1.5.2)
126 |     rack (2.2.3)
127 |     rack-protection (2.1.0)
128 |       rack
129 |     rack-test (1.1.0)
130 |       rack (>= 1.0, < 3)
131 |     rails-dom-testing (2.0.3)
132 |       activesupport (>= 4.2.0)
133 |       nokogiri (>= 1.6)
134 |     rails-html-sanitizer (1.3.0)
135 |       loofah (~> 2.3)
136 |     rake (13.0.3)
137 |     rb-fsevent (0.10.4)
138 |     rb-inotify (0.10.1)
139 |       ffi (~> 1.0)
140 |     rspec (3.10.0)
141 |       rspec-core (~> 3.10.0)
142 |       rspec-expectations (~> 3.10.0)
143 |       rspec-mocks (~> 3.10.0)
144 |     rspec-core (3.10.1)
145 |       rspec-support (~> 3.10.0)
146 |     rspec-expectations (3.10.1)
147 |       diff-lcs (>= 1.2.0, < 2.0)
148 |       rspec-support (~> 3.10.0)
149 |     rspec-mocks (3.10.1)
150 |       diff-lcs (>= 1.2.0, < 2.0)
151 |       rspec-support (~> 3.10.0)
152 |     rspec-support (3.10.1)
153 |     rspec_junit_formatter (0.4.1)
154 |       rspec-core (>= 2, < 4, != 2.12.0)
155 |     ruby-prof (1.4.2)
156 |     ruby2_keywords (0.0.2)
157 |     safe_yaml (1.0.5)
158 |     sass (3.7.4)
159 |       sass-listen (~> 4.0.0)
160 |     sass-listen (4.0.0)
161 |       rb-fsevent (~> 0.9, >= 0.9.4)
162 |       rb-inotify (~> 0.9, >= 0.9.7)
163 |     simplecov (0.21.1)
164 |       docile (~> 1.1)
165 |       simplecov-html (~> 0.11)
166 |       simplecov_json_formatter (~> 0.1)
167 |     simplecov-html (0.12.3)
168 |     simplecov_json_formatter (0.1.2)
169 |     sinatra (2.1.0)
170 |       mustermann (~> 1.0)
171 |       rack (~> 2.2)
172 |       rack-protection (= 2.1.0)
173 |       tilt (~> 2.0)
174 |     stretchy (0.7.0)
175 |       elasticsearch (~> 5.0)
176 |       excon (~> 0.45)
177 |     thor (0.20.3)
178 |     thread_safe (0.3.6)
179 |     tilt (2.0.10)
180 |     typhoeus (1.4.0)
181 |       ethon (>= 0.9.0)
182 |     tzinfo (1.2.9)
183 |       thread_safe (~> 0.1)
184 |     zeitwerk (2.4.2)
185 | 
186 | PLATFORMS
187 |   ruby
188 | 
189 | DEPENDENCIES
190 |   actionview (= 6.0.3.4)
191 |   aws-sdk (~> 2)
192 |   cf-app-utils
193 |   dotenv
194 |   elasticsearch
195 |   erubis
196 |   hashie
197 |   liquid (= 3.0.3)
198 |   liquify
199 |   newrelic_rpm
200 |   oj
201 |   padrino (~> 0.15)
202 |   parallel
203 |   parslet
204 |   pry
205 |   pry-byebug
206 |   puma (~> 4.3)
207 |   rack-test
208 |   rake
209 |   rspec
210 |   rspec-mocks
211 |   rspec_junit_formatter
212 |   ruby-prof
213 |   safe_yaml
214 |   sass
215 |   simplecov
216 |   stretchy
217 |   typhoeus
218 | 
219 | RUBY VERSION
220 |    ruby 2.6.6p146
221 | 
222 | BUNDLED WITH
223 |    1.17.2
224 | 


--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
  1 | # Running Open Data Maker on your computer
  2 | 
  3 | If you just want to install and run, then you can just download a
  4 | [zip file](https://github.com/18F/open-data-maker/archive/master.zip).
  5 | 
  6 | You will still need the the dependencies below, but you don't need to
  7 | clone the git repo for the source code.
  8 | 
  9 | ## Docker
 10 | Local development is possible using a [Docker] container + `docker-compose`
 11 | 
 12 | ### First run
 13 | - Start docker
 14 | 
 15 | ```
 16 | docker-compose up
 17 | ```
 18 | 
 19 | - Run `rake import` in the `web` container.
 20 | 
 21 | ```
 22 | docker-compose run web rake import
 23 | ```
 24 | 
 25 | - Application is available at `http://localhost:3000`
 26 | 
 27 | ### After initial run
 28 | 
 29 | ```
 30 | docker-compose up
 31 | ```
 32 | 
 33 | - or run it detached with
 34 | 
 35 | ```
 36 | docker-compose up -d
 37 | ```
 38 | 
 39 | ## Install Prerequisites
 40 | 
 41 | You can run our bootstrap script to make sure you have all the dependencies.
 42 | It will also install and start up Elasticsearch:
 43 | 
 44 | ```
 45 | script/bootstrap
 46 | ```
 47 | 
 48 | To run Open Data Maker, you will need to have the following software installed on your computer:
 49 | * [Elasticsearch] 2.4.4
 50 | * [Ruby] 2.6.5
 51 | 
 52 | **NOTE: Open Data Maker indexing currently is very slow on ES2.x, however, an index created on 1.x can be restored to 2.x.
 53 | 
 54 | ### Mac OS X
 55 | 
 56 | On a Mac, we recommend installing [RVM].
 57 | 
 58 | If you are contributing to development, you will also need [Git].
 59 | 
 60 | 
 61 | ## Get the Source Code
 62 | 
 63 | For development, [fork](http://help.github.com/fork-a-repo/) the repo
 64 | first, then clone your fork.
 65 | 
 66 | ```
 67 | git clone https://github.com/<your GitHub username>/open-data-maker.git
 68 | cd open-data-maker
 69 | ```
 70 | 
 71 | ## Run the App
 72 | 
 73 | 
 74 | ### Import the data
 75 | 
 76 | To get started, you can import sample data with:
 77 | 
 78 | `rake import`
 79 | 
 80 | ### Start the app
 81 | 
 82 | ```
 83 | padrino start
 84 | ```
 85 | Go to: http://127.0.0.1:3000/
 86 | 
 87 | and you should see the text `Welcome to Open Data Maker` with a link to
 88 | the API created by the [sample data](sample-data).  
 89 | 
 90 | You can verify that the import was successful by visiting
 91 | http://127.0.0.1:3000/v1/cities?name=Cleveland. You should see something like:
 92 | 
 93 | ```json
 94 | {
 95 |   "state": "OH",
 96 |   "name": "Cleveland",
 97 |   "population": 396815,
 98 |   "land_area": 77.697,
 99 |   "location": {
100 |     "lat": 41.478138,
101 |     "lon": -81.679486
102 |   }
103 | ```
104 | 
105 | ### Custom Datasets
106 | 
107 | While the app is running (or anytime) you can run `rake import`. For instance, if you had a `presidents/data.yaml` file, you would import
108 | it with:
109 | 
110 | ```sh
111 | export DATA_PATH=presidents
112 | rake import
113 | # or, more succintly:
114 | DATA_PATH=presidents rake import
115 | ```
116 | 
117 | to clear the data, assuming the data set  had an index named "president-data"
118 | 
119 | ```
120 | rake es:delete[president-data]
121 | ```
122 | 
123 | you may alternately delete all the indices (which could affect other apps if
124 | they are using your local Elasticsearch)
125 | 
126 | ```
127 | rake es:delete[_all]
128 | ```
129 | 
130 | The data directory can optionally include a file called `data.yaml` (see [the sample one](sample-data/data.yaml) for its schema) that references one or more `.csv` files and specifies data types,
131 | field name mapping, and other support data.
132 | 
133 | 
134 | ## Debugging
135 | 
136 | `ES_DEBUG` environment variable will turn on verbose tracer in the Elasticsearch client
137 | 
138 | optional performance profiling for rake import: `rake import[profile=true]`
139 | 
140 | 
141 | ## Want to help?
142 | 
143 | See [Contribution Guide](CONTRIBUTING.md)
144 | 
145 | [Elasticsearch]: https://www.elastic.co/products/elasticsearch
146 | [Homebrew]: http://brew.sh/
147 | [RVM]: https://github.com/wayneeseguin/rvm
148 | [rbenv]: https://github.com/sstephenson/rbenv
149 | [Ruby]: https://www.ruby-lang.org/en/
150 | [Git]: https://git-scm.com/
151 | [Docker]: https://www.docker.com/
152 | 
153 | 
154 | 
155 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | As a work of the United States Government, this project is in the
 2 | public domain within the United States.
 3 | 
 4 | Additionally, we waive copyright and related rights in the work
 5 | worldwide through the CC0 1.0 Universal public domain dedication.
 6 | 
 7 | ## CC0 1.0 Universal Summary
 8 | 
 9 | This is a human-readable summary of the
10 | [Legal Code (read the full text)](https://creativecommons.org/publicdomain/zero/1.0/legalcode).
11 | 
12 | ### No Copyright
13 | 
14 | The person who associated a work with this deed has dedicated the work to
15 | the public domain by waiving all of his or her rights to the work worldwide
16 | under copyright law, including all related and neighboring rights, to the
17 | extent allowed by law.
18 | 
19 | You can copy, modify, distribute and perform the work, even for commercial
20 | purposes, all without asking permission.
21 | 
22 | ### Other Information
23 | 
24 | In no way are the patent or trademark rights of any person affected by CC0,
25 | nor are the rights that other persons may have in the work or in how the
26 | work is used, such as publicity or privacy rights.
27 | 
28 | Unless expressly stated otherwise, the person who associated a work with
29 | this deed makes no warranties about the work, and disclaims liability for
30 | all uses of the work, to the fullest extent permitted by applicable law.
31 | When using or citing the work, you should not imply endorsement by the
32 | author or the affirmer.
33 | 
34 | 


--------------------------------------------------------------------------------
/Procfile:
--------------------------------------------------------------------------------
1 | web: bundle exec puma -C config/puma.rb
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Open Data Maker
 2 | [![Build Status](https://circleci.com/gh/RTICWDT/open-data-maker/tree/dev.svg?style=svg)](https://circleci.com/gh/18F/open-data-maker/tree/dev)
 3 | 
 4 | The goal of this project is to make it easy to turn a lot of potentially large
 5 | csv files into open data via an API and the ability for people to download
 6 | smaller csv files with a subset of the data.
 7 | 
 8 | Preliminary research suggests that open data users (journalists and others)
 9 | actually know how to work with spreadsheets really well, but a lot of the
10 | data sets that we have in government are huge.
11 | 
12 | The first version of this project will allow us to host a website for an
13 | agency with a specific set of csv files, which are deployed with the app.
14 | This will allows us to deploy more quickly since there will be a lower risk
15 | security profile than if an agency could upload the CSV files (which might
16 | be a nice longer term feature).
17 | 
18 | 
19 | ## Install and Run the App (as a developer)
20 | 
21 | See our [Installation Guide](INSTALL.md)
22 | 
23 | ## How this works
24 | 
25 | By default, data will be loaded from /sample-data when you run `rake import`
26 | 
27 | * [cities100.csv](sample-data/cities100.csv) - dataset of 100 most populous cities in the US
28 | * [data.yaml](sample-data/data.yaml) - configuration for
29 |   * index name *city-data*
30 |   * api endpoint name *cities*
31 |   * how columns are mapped to fields in json output
32 |   * data types
33 |   * unique columns *name*  
34 | 
35 | When you run the app, you can query the dataset via json API, like: /cities?name=Chicago
36 | 
37 | * http://localhost:3000/cities?name=Chicago
38 | * http://localhost:3000/cities?name=Chicago&state=IL
39 | * http://localhost:3000/cities?state=NY,MA
40 | * http://localhost:3000/cities?state=CA&fields=name,size
41 | 
42 | To use your own data, you can set a different directory, for example:
43 | 
44 | ```
45 | export DATA_PATH='./data'
46 | ```
47 | 
48 | 1. Put csv files into /data
49 | 1. Import files from /data: ```rake import``` (or restart the app)
50 |    1. There can be multiple files (must end in .csv)
51 |    1. Optional [data.yaml](sample-data/data.yaml) file that specifies  index name, API endpoint, file list, and a dictionary of column -> field name mapping and types
52 |         1. Optionally import all the columns, not just ones specified in dictionary (see example: [import: all](spec/fixtures/import_with_options/data.yaml))
53 |         1. If data.yaml not provided, all fields and fields will be imported with folder or bucket name used as the API endpoint (name is 'slugified' with dashes replacing spaces)
54 | 1. api endpoint to get the data /api=endpoint?field_or_column_name=value
55 | 
56 | ## More Configuration Options
57 | 
58 | Often while you are developing an API and data dictionary,
59 | it is helpful to include all the columns in the csv.  If you add the following to
60 | data.yaml, the field names and types from the dictionary will be used and any
61 | unspecified columns will simply use the column name as the field name.
62 | 
63 | ```
64 | options:
65 |   columns: all
66 | ```
67 | 
68 | You can use the dictionary to provide nice errors to developers who use the API.
69 | This can be used in conjunction with the above ```columns: all``` which will
70 | make it so that columns that are not referenced in the dictionary are not
71 | searchable, but will make it so that unspecified fields cause errors to be
72 | reported.
73 | 
74 | ```
75 | options:
76 |   search: dictionary_only
77 | ```
78 | 
79 | Also for debugging, you can limit the number of files that will be imported.  This is helpful when the import process is time consuming because you have many, many files, but can test format changes with a subset of the files.
80 | 
81 | ```
82 | options:
83 |   limit: 4
84 | ```
85 | 
86 | 
87 | ### Acknowledgements
88 | Zipcode latitude and longitude provided by [GeoNames](http://www.geonames.org/) under under a [Creative Commons Attribution 3.0 License](http://creativecommons.org/licenses/by/3.0/).
89 | 
90 | ### Public domain
91 | 
92 | Except as noted above, this project is in the worldwide [public domain](LICENSE.md). As stated in [CONTRIBUTING](CONTRIBUTING.md):
93 | 
94 | > This project is in the public domain within the United States, and copyright and related rights in the work worldwide are waived through the [CC0 1.0 Universal public domain dedication](https://creativecommons.org/publicdomain/zero/1.0/).
95 | >
96 | > All contributions to this project will be released under the CC0 dedication. By submitting a pull request, you are agreeing to comply with this waiver of copyright interest.
97 | 


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | require 'bundler/setup'
2 | require 'padrino-core/cli/rake'
3 | 
4 | 
5 | task :default => :spec
6 | 
7 | PadrinoTasks.init
8 | 


--------------------------------------------------------------------------------
/app/app.rb:
--------------------------------------------------------------------------------
 1 | require 'csv'
 2 | 
 3 | module OpenDataMaker
 4 |   class App < Padrino::Application
 5 |     register SassInitializer
 6 |     register Padrino::Helpers
 7 | 
 8 |     # This app is stateless and session cookies prevent caching of API responses
 9 |     disable :sessions
10 | 
11 |     # This app has no sensitive bits and csrf protection requires sessions
12 |     disable :protect_from_csrf
13 | 
14 |     if ENV['DATA_AUTH'] and not ENV['DATA_AUTH'].empty?
15 |       auth = ENV['DATA_AUTH']
16 |       authorized_user, authorized_pass = auth.split(',')
17 |       use Rack::Auth::Basic, "Restricted Area" do |username, password|
18 |         username == authorized_user and password == authorized_pass
19 |       end
20 |     end
21 | 
22 |     ## app setup
23 |     if ENV['RACK_ENV'] == 'test'
24 |       DataMagic.init(load_now: true)
25 |     else
26 |       DataMagic.init(load_now: false)   # don't index data
27 |     end
28 | 
29 |   end
30 | 
31 | end
32 | 


--------------------------------------------------------------------------------
/app/index_app.rb:
--------------------------------------------------------------------------------
 1 | require 'csv'
 2 | 
 3 | module OpenDataMaker
 4 | 
 5 |   class IndexApp < Padrino::Application
 6 |     register SassInitializer
 7 |     register Padrino::Helpers
 8 | 
 9 |     enable :sessions
10 | 
11 |     get '/' do
12 |       DataMagic.config.scoped_index_name
13 |     end
14 | 
15 |     get '/init' do
16 |       DataMagic.init(load_now: true)
17 |       "ok"
18 |     end
19 | 
20 |     get '/reindex' do
21 |       DataMagic.reindex
22 |       "reindexing..."
23 |     end
24 |   end
25 | 
26 | end
27 | 


--------------------------------------------------------------------------------
/app/stylesheets/application.sass:
--------------------------------------------------------------------------------
  1 | body
  2 |   -webkit-font-smoothing: antialiased
  3 |   font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif
  4 |   font-size: 1em
  5 |   line-height: 1.5
  6 |   color: #333
  7 | 
  8 | h1, h2, h3, h4, h5, h6
  9 |   font-family: "Raleway", "Helvetica Neue", Helvetica, Arial, sans-serif
 10 |   line-height: 1.1em
 11 |   margin: 0
 12 |   text-rendering: optimizeLegibility
 13 | 
 14 | p
 15 |   margin: 0 0 0.75em
 16 | 
 17 | hr
 18 |   border-bottom: 1px solid silver
 19 |   border-left: none
 20 |   border-right: none
 21 |   border-top: none
 22 |   margin: 1em 0
 23 | 
 24 | img
 25 |   -webkit-user-select: none
 26 |   cursor: zoom-in
 27 |   margin: 0
 28 |   max-width: 50%
 29 | 
 30 | .logo
 31 |   height: 150px
 32 |   width: 150px
 33 |   top: 50px
 34 |   left: 50px
 35 |   z-index: 20
 36 | 
 37 | @media screen and (max-width: 995px)
 38 |   .logo
 39 |     height: 100px
 40 |     width: 100px
 41 |     top: 40px
 42 |     left: 20px
 43 | 
 44 | @media screen and (max-width: 785px)
 45 |   .logo
 46 |     height: 75px
 47 |     width: 75px
 48 | 
 49 | @media screen and (max-width: 590px)
 50 |   .logo
 51 |     top: 73px
 52 | 
 53 | @media screen and (max-width: 480px)
 54 |   .logo
 55 |     top: 16px
 56 |     left: 0px
 57 | 
 58 | .bottom-margin
 59 |   margin-bottom: 0.5em
 60 |   color: #c00
 61 | 
 62 | .title
 63 |   text-align: center
 64 |   font-family: "Raleway", "Helvetica Neue", Helvetica, Arial, sans-serif
 65 |   font-size: 2em
 66 |   line-height: 2em
 67 | 
 68 | .header
 69 |   background-color: #9cf
 70 | 
 71 | .categories .category
 72 |   margin: 5px
 73 |   padding: 15px
 74 |   border: solid 1px silver
 75 |   word-wrap: break-word
 76 |   display: inline-block
 77 |   width: 92%
 78 |   background-color: #ffc
 79 |   a
 80 |     color: black
 81 |     text-decoration: none
 82 |     &:visited
 83 |       color: black
 84 | 
 85 | .categories__column
 86 |   display: inline-block
 87 |   width: 100%
 88 |   vertical-align: top
 89 |   -webkit-column-count: 2
 90 |   -moz-column-count: 2
 91 |   column-count: 2
 92 |   column-gap: .2em
 93 |   -webkit-column-gap: .2em
 94 |   -moz-column-gap: .2em
 95 | 
 96 | .category__name
 97 |   font-size: 18px
 98 |   font-weight: bold
 99 |   margin-bottom: 5px
100 |   color: #c00
101 | 
102 | .category__fields
103 |   list-style: none
104 |   padding: 0
105 | 
106 | .category__field-name
107 |   font-size: 15px
108 |   font-weight: bold
109 |   margin-bottom: 2px
110 |   color: #c00
111 |   width: 80%
112 | 
113 | .category__field-type
114 |   font-size: 15px
115 |   font-weight: bold
116 |   color: #c00
117 |   width: 10%
118 |   float: right
119 | 


--------------------------------------------------------------------------------
/app/views/category.liquid:
--------------------------------------------------------------------------------
 1 | <section>
 2 |   <div class="header">
 3 |     <img class="logo" style="float: left" src="https://18f.gsa.gov/assets/images/logo-18f.png">
 4 |     <br/>
 5 |     <h1 class="title">{{ title }}</h1>
 6 |     <div style="clear: both"></div>
 7 |   </div>
 8 |   </br>
 9 | </section>
10 | 
11 | <h2 class="bottom-margin">Data Details for the <span id="category-name"></span> Category</h2>
12 | 
13 | <div class="category">
14 |   <div id="category-details" class="category"></div>
15 |   <div id="field-details" class="category"></div>
16 | </div>
17 | 
18 | </br>
19 | <a href="/">Back to the list of Categories</a>
20 | 
21 | <script>
22 |   var category_entry = {{ category_entry }} ;
23 |   var field_details = {{ field_details }} ;
24 | 
25 |   var category_details_div = "<p>" + category_entry.description + "</p>" +
26 |     "<ul class=\"category__fields\">" +
27 |     "<hr/>";
28 | 
29 |   var field_details_div = "" ;
30 |   category_entry.fields.forEach(function (field) {
31 |     var field_description = field_details[field].description ;
32 |     var field_type = field_details[field].type ;
33 |     if ( field_type ) {
34 |       field_type = "[" + field_type + "]" ;
35 |     }
36 |     else {
37 |       field_type = "" ;
38 |     }
39 | 
40 |     field_details_div = field_details_div +
41 |       "<li class=\"category__fields\">" +
42 |         "<div class=\"category__field-name\">" + field +
43 |           "<span class=\"category__field-type\">" + field_type + "</span>" +
44 |           "</div>" +
45 |         "<div>" + field_description + "</div>" +
46 |         "<hr/>" +
47 |       "</li>" +
48 |      "</ul>";
49 |   });
50 | 
51 |   document.getElementById("category-name").innerHTML = category_entry.title;
52 |   document.getElementById("category-details").innerHTML = category_details_div;
53 |   document.getElementById("field-details").innerHTML = field_details_div;
54 | </script>
55 | 


--------------------------------------------------------------------------------
/app/views/home.liquid:
--------------------------------------------------------------------------------
 1 | <section>
 2 |   <div class="header">
 3 |       <div class="logo" style="float:left;width:130px;">
 4 |         <svg xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 154 96" height="96" width="154" xml:space="preserve" version="1">
 5 |             <g transform="matrix(1.3333333,0,0,-1.3333333,0,96.173333)">
 6 |                 <g transform="scale(0.1)">
 7 |                     <path d="M440 282H341C401 322 453 390 383 431 448 364 339 302 264 282h-40v178c27 4 58 5 95 0-28 8-61 14-95 17v20H389C329 457 278 389 347 349c-57 60 22 114 92 140V282" fill="#5f7ebd"></path>
 8 |                     <path d="m873 364c0-16 0-33-1-49-1-11 0-23-12-26-4-1-16 0-9-5 3-2 15 0 18 0 13 0 25 0 38 0 5 0 27-4 30 0 5 6-7 4-10 5-14 1-18 6-19 20-2 23-1 46-1 69 0 19 0 37 0 56 0 12-4 39 2 50 5 8 10 4 17 7 6 3 5 4-1 5-13 2-27-1-40-1-4 0-36 4-37-1-1-4 15-4 17-5 6-3 7-9 7-15 3-36 1-73 1-109" fill="#244f99"></path>
 9 |                     <path d="m225 215v1c2 0 2 1 2 4v29c0 3-1 4-2 4v0h12v0c-2 0-3-1-3-4v-29c0-3 1-4 3-4v-1h-12v0M299 249v-36l-28 34v-28c0-2 1-4 3-4v-1h-9v1c2 0 3 1 3 4v29c0 3-1 4-3 4v0h9l21-26v21c0 3-1 4-3 4v0h8v0c-2 0-2-1-2-4v0M360 248l-1 0c0 2-1 2-8 2h-4V220c0-3 1-4 3-4v-1h-12v1c2 0 3 1 3 4v31h-5c-6 0-7 0-7-2l-1 0-1 5h33l-1-5v0M413 215h-24v1c2 0 2 1 2 4v30c0 3-1 4-3 4v0h24l-1-5-1 0c0 2-1 2-7 2h-6v-14h6c7 0 7 1 7 2h1v-8h-1c0 2-1 2-7 2h-6v-12c0-3 1-3 4-3h6c3 0 6 1 7 3l1 0-3-6v0M475 215v1c-1 0-2 1-4 3l-10 15c3 1 7 4 7 10 0 12-13 10-15 10h-11v0c2 0 3-2 3-4v-29c0-3-1-4-3-4v-1h12v1c-2 0-3 1-3 4v12c1 0 2 0 3 0l11-17h9zm-13 28c0-5-3-8-9-8h-1v16h2c5 0 8-3 8-8v0M535 249v-36l-28 34v-28c0-2 1-4 3-4v-1h-9v1c2 0 3 1 3 4v29c0 3-1 4-3 4v0h9l21-26v21c0 3-1 4-3 4v0h8v0c-2 0-2-1-2-4v0M599 215v1c-1 0-2 1-3 3l-13 35h-7v0c0 0 2 0 1-3L565 218c0-1-2-2-3-2v-1h7v1c-1 0-2 1-1 3l4 10h13l4-10c1-1 0-3-1-3v-1h12zm-15 16h-11l6 16 6-16v0M650 248l-1 0c0 2-1 2-8 2h-4V220c0-3 1-4 3-4v-1h-12v1c2 0 3 1 3 4v31h-5c-6 0-7 0-7-2l-1 0-1 5h33l-1-5v0M679 215v1c2 0 2 1 2 4v29c0 3-1 4-2 4v0h12v0c-2 0-3-1-3-4v-29c0-3 1-4 3-4v-1h-12v0M760 234c0 11-8 20-20 20-14 0-21-10-21-20 0-10 7-20 20-20 14 0 20 10 20 20zm-7 0c0-14-8-17-13-17-10 0-13 9-13 17 0 12 6 17 13 17 9 0 13-8 13-16v0M822 249v-36l-28 34v-28c0-2 1-4 3-4v-1h-9v1c2 0 3 1 3 4v29c0 3-1 4-3 4v0h9l21-26v21c0 3-1 4-3 4v0h8v0c-2 0-2-1-2-4v0M887 215v1c-1 0-2 1-3 3L871 253h-7v0c0 0 2 0 1-3l-12-32c-1-1-2-2-3-2v-1h7v1c-1 0-2 1-1 3l4 10h14l4-10c1-1 0-3-1-3v-1h12zm-15 16H861l6 16 6-16v0M937 215h-24v1c1 0 2 1 2 4v29c0 3-1 4-3 4v0h12v0c-1 0-2 0-2-4v-29c0-2 1-3 3-3h4c4 0 8 1 9 3l1 0-3-6v0" fill="#244f99"></path>
10 |                     <path d="m482 365c0-15 0-29 0-44 0-9 1-25-7-31-3-2-27-4-12-7 10-2 24 1 35 1 10 0 19-1 29-1 4 0 12-2 16 0 7 4-2 5-6 5-12 1-18 2-20 15-2 11-1 22-2 33 0 31 0 62 0 92 0 13 0 26 0 39 0 3-1 8 0 11 3 8 18 6 26 5 28-6 38-36 34-62-2-15-9-28-21-37-2-1-33-17-34-15 2-3 17 2 20 2 6-1 7-2 10-7 12-14 22-31 33-46 9-12 19-25 32-31 11-5 23-4 35-4 3 0 25-2 25 3 0-1-27 9-30 11-14 9-25 24-35 37-13 16-26 33-39 49 27 19 49 55 31 88-18 31-61 27-92 26-15 0-30 0-45 1-5 0-20-1-8-5 7-2 14 1 20-5 5-5 4-16 4-23 0-12 0-25 0-37 0-21 0-41 0-62M782 494c11 0 23 2 34 2 8 0 9-1 11-9 1-7 3-13 4-20 1-6 1-12-5-6-5 5-4 12-11 15-7 3-16 3-24 3-14 1-28 1-42 1 0-26 0-53 1-79 0-12 0-23 0-35 0-10 0-21 0-31 0-11-2-27 3-37 2-5 5-6 9-8 3-1 15 1 18-2 5-5-11-4-12-4-9 0-18 0-26 1-10 0-21 0-31 0-9 0-30 1-9 5 21 4 14 49 14 65 0 11 0 23 0 34 0 30 0 61 0 91-18 0-42 3-60-3-8-3-9-7-13-13-4-7-6-7-6 2 0 8 2 28 9 32 7 3 23-1 30-2 4 0 8 0 12 0 10 0 19 0 29 0 22 0 44 0 66 0" fill="#244f99"></path>
11 |                 </g>
12 |             </g>
13 |         </svg>
14 |     </div>
15 |     </br>
16 |     <h1 class="title">{{ title }}</h1>
17 |     <div style="clear: both"></div>
18 |   </div>
19 |   </br>
20 | </section>
21 | 
22 | <h2>API endpoints</h2>
23 | 
24 | <ul>
25 | {% for name in endpoints %}
26 |   <li><a href="/v1/{{ name }}">{{ name }}</a></li>
27 | {% endfor %}
28 | </ul>
29 | 
30 | {% if examples.size > 0 %}
31 |   <h2>Examples</h2>
32 | 
33 |   <ul>
34 |   {% for ex in examples %}
35 |     <li><a href="{{ ex.link }}">{{ ex.name }}</a> {{ ex.description }}</li>
36 |   {% endfor %}
37 |   </ul>
38 | {% endif %}
39 | 
40 | <h2 class="bottom-margin">Browse Data Details by Category</h2>
41 | <div class="categories">
42 |   <div id="categories-list" class="categories__column"></div>
43 | </div>
44 | 
45 | <script>
46 |   var categories = {{ categories }} ;
47 |   if (categories === null)
48 |     var oneColumn = "Sorry, no categories have been added yet.";
49 |   else {
50 |     var oneColumn = "";
51 |     for (var i in categories) {
52 |       oneColumn = oneColumn +
53 |         "<div class=\"category\">" +    // put box around category block
54 |         "<a href=\"/category/" + i + "\">" +    // make whole div into a link
55 |           "<div class=\"category__name\">" + categories[i].title + "</div>" +
56 |           "<p>" + categories[i].description + "</p>" +
57 |         "</a>" +
58 |         "</div>";
59 |     }
60 |   }
61 | //  alert(oneColumn);
62 |   document.getElementById("categories-list").innerHTML=oneColumn;
63 | </script>
64 | 


--------------------------------------------------------------------------------
/app/views/layouts/application.erb:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | 
 3 | <html>
 4 |   <head>
 5 |     <link rel="stylesheet" type="text/css" href="/stylesheets/application.css">
 6 |     <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script>
 7 |   </head>
 8 |   <body>
 9 |     <%== yield %>
10 |   </body>
11 | </html>
12 | 


--------------------------------------------------------------------------------
/bin/open-data-maker:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | 
 3 | Dir.chdir(File.dirname(__FILE__)+'/..')
 4 | 
 5 | # Start the app with Padrino::Server
 6 | require 'rubygems'
 7 | require 'bundler/setup'
 8 | require 'padrino-core/cli/launcher'
 9 | 
10 | ARGV.unshift('start') if ARGV.first.nil? || ARGV.first.start_with?('-')
11 | Padrino::Cli::Launcher.start ARGV
12 | 
13 | # Start the app with Rack::Server
14 | #require "rack"
15 | #Rack::Server.start
16 | 


--------------------------------------------------------------------------------
/config.ru:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env rackup
 2 | # encoding: utf-8
 3 | 
 4 | # This file can be used to start Padrino,
 5 | # just execute it from the command line.
 6 | 
 7 | require File.expand_path("../config/boot.rb", __FILE__)
 8 | 
 9 | run Padrino.application
10 | 


--------------------------------------------------------------------------------
/config/apps.rb:
--------------------------------------------------------------------------------
 1 | ##
 2 | # This file mounts each app in the Padrino project to a specified sub-uri.
 3 | # You can mount additional applications using any of these commands below:
 4 | #
 5 | #   Padrino.mount('blog').to('/blog')
 6 | #   Padrino.mount('blog', :app_class => 'BlogApp').to('/blog')
 7 | #   Padrino.mount('blog', :app_file =>  'path/to/blog/app.rb').to('/blog')
 8 | #
 9 | # You can also map apps to a specified host:
10 | #
11 | #   Padrino.mount('Admin').host('admin.example.org')
12 | #   Padrino.mount('WebSite').host(/.*\.?example.org/)
13 | #   Padrino.mount('Foo').to('/foo').host('bar.example.org')
14 | #
15 | # Note 1: Mounted apps (by default) should be placed into the project root at '/app_name'.
16 | # Note 2: If you use the host matching remember to respect the order of the rules.
17 | #
18 | # By default, this file mounts the primary app which was generated with this project.
19 | # However, the mounted app can be modified as needed:
20 | #
21 | #   Padrino.mount('AppName', :app_file => 'path/to/file', :app_class => 'BlogApp').to('/')
22 | #
23 | 
24 | ##
25 | # Setup global project settings for your apps. These settings are inherited by every subapp. You can
26 | # override these settings in the subapps as needed.
27 | #
28 | Padrino.configure_apps do
29 |   # enable :sessions
30 |   set :session_secret, 'ffb8bfc2d71e2ad938950169de2757ab7b73b1cd5fbf91b4b912ae493dc5b70f'
31 |   set :protection, :except => :path_traversal
32 |   set :protect_from_csrf, true
33 | 
34 |   set :allow_origin, :any
35 | 
36 | end
37 | 
38 | # If needed, mount the app that does indexing
39 | if ENV['INDEX_APP'] == "enable"
40 |   puts "mounting index app"
41 |   Padrino.mount('OpenDataMaker::IndexApp', :app_file => Padrino.root('app/index_app.rb')).to('/index')
42 | end
43 | 
44 | # Mounts the core application for this project
45 | Padrino.mount('OpenDataMaker::App', :app_file => Padrino.root('app/app.rb')).to('/')
46 | 


--------------------------------------------------------------------------------
/config/boot.rb:
--------------------------------------------------------------------------------
 1 | require_relative 'env.rb'
 2 | 
 3 | ##
 4 | # ## Enable devel logging
 5 | #
 6 | # Padrino::Logger::Config[:development][:log_level]  = :devel
 7 | # Padrino::Logger::Config[:development][:log_static] = true
 8 | #
 9 | # ## Configure your I18n
10 | #
11 | # I18n.default_locale = :en
12 | # I18n.enforce_available_locales = false
13 | #
14 | # ## Configure your HTML5 data helpers
15 | #
16 | # Padrino::Helpers::TagHelpers::DATA_ATTRIBUTES.push(:dialog)
17 | # text_field :foo, :dialog => true
18 | # Generates: <input type="text" data-dialog="true" name="foo" />
19 | #
20 | # ## Add helpers to mailer
21 | #
22 | # Mail::Message.class_eval do
23 | #   include Padrino::Helpers::NumberHelpers
24 | #   include Padrino::Helpers::TranslationHelpers
25 | # end
26 | 
27 | ##
28 | # Add your before (RE)load hooks here
29 | #
30 | Padrino.before_load do
31 | end
32 | 
33 | ##
34 | # Add your after (RE)load hooks here
35 | #
36 | Padrino.after_load do
37 | end
38 | 
39 | Padrino.load!
40 | 


--------------------------------------------------------------------------------
/config/env.rb:
--------------------------------------------------------------------------------
 1 | # define core environment that we need in tests and for the app
 2 | 
 3 | # Defines our constants
 4 | ENV['RACK_ENV'] ||= 'development'
 5 | RACK_ENV          = ENV['RACK_ENV'] unless defined?(RACK_ENV)
 6 | PADRINO_ROOT      = File.expand_path('../..', __FILE__) unless defined?(PADRINO_ROOT)
 7 | 
 8 | # Load our dependencies
 9 | require 'rubygems' unless defined?(Gem)
10 | require 'bundler/setup'
11 | require 'newrelic_rpm'
12 | Bundler.require(:default, RACK_ENV)
13 | 
14 | # do this early so we can log during startup
15 | require './lib/data_magic/config.rb'
16 | DataMagic::Config.logger=Logger.new(STDOUT) if ENV['VCAP_APPLICATION']    # Cloud Foundry
17 | 


--------------------------------------------------------------------------------
/config/puma.rb:
--------------------------------------------------------------------------------
 1 | workers Integer(ENV['WEB_CONCURRENCY'] || 2)
 2 | threads_count = Integer(ENV['MAX_THREADS'] || 5)
 3 | threads threads_count, threads_count
 4 | worker_timeout 30
 5 |  
 6 | preload_app!
 7 |  
 8 | rackup      DefaultRackup
 9 | port        ENV['PORT']     || 3000
10 | environment ENV['RACK_ENV'] || 'development'
11 | 


--------------------------------------------------------------------------------
/config/unicorn.rb:
--------------------------------------------------------------------------------
1 | worker_processes 5
2 | timeout 30
3 | preload_app true
4 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | services:
 3 |   odm-elasticsearch:
 4 |     image: elasticsearch:2.4.4
 5 |     container_name: odm-elasticsearch
 6 |     environment:
 7 |       ES_JAVA_OPTS: "-Dmapper.allow_dots_in_name=true -Xms512m -Xmx512m"
 8 |       discovery.type: single-node
 9 |     ports:
10 |       - "9200:9200"
11 |     volumes:
12 |       - ./.docker/data/es:/usr/share/elasticsearch/data
13 |   web:
14 |     build: .
15 |     command: bash -c
16 |       "./wait-for-it.sh odm-elasticsearch:9200 && bundle exec padrino start -h 0.0.0.0"
17 |     volumes:
18 |       - .:/myapp
19 |     ports:
20 |       - "3000:3000"
21 |     environment:
22 |       - ES_URI=odm-elasticsearch
23 |       - RACK_ENV=test # Set env to ensure ES configs with replicas 0
24 |     depends_on:
25 |       - odm-elasticsearch


--------------------------------------------------------------------------------
/lib/data_magic/category.rb:
--------------------------------------------------------------------------------
 1 | Category = Struct.new(:category_id) do
 2 |   def assemble
 3 |     category_entry = DataMagic.config.data['categories'][category_id]
 4 |     dictionary = DataMagic.config.dictionary
 5 |     field_details = {}
 6 |     category_entry['fields'].each do |field_name|
 7 |       field_details[field_name] = dictionary[field_name] || { "description"=>"" }
 8 |     end
 9 |     field_details = { "field_details" => field_details }
10 |     assemble = category_entry.merge(field_details)
11 |   end
12 | end
13 | 


--------------------------------------------------------------------------------
/lib/data_magic/error_checker.rb:
--------------------------------------------------------------------------------
  1 | require 'action_view'
  2 | include ActionView::Helpers::SanitizeHelper
  3 | module DataMagic
  4 |   module ErrorChecker
  5 |     class << self
  6 |       def check(params, options, config)
  7 |         report_required_params_absent(options) +
  8 |           report_nonexistent_params(params, config) +
  9 |           report_nonexistent_operators(params) +
 10 |           report_nonexistent_fields(options[:fields], config) +
 11 |           report_bad_range_argument(params) +
 12 |           report_wrong_field_type(params, config) +
 13 |           report_wrong_zip(options) +
 14 |           report_distance_requires_zip(options)
 15 |       end
 16 | 
 17 |       private
 18 | 
 19 |       def report_required_params_absent(options)
 20 |         if options[:command] == 'stats' && options[:fields].length == 0
 21 |           [build_error(error: 'invalid_or_incomplete_parameters', input: options[:command])]
 22 |         else
 23 |           []
 24 |         end
 25 |       end
 26 | 
 27 |       def report_distance_requires_zip(params)
 28 |         # if distance, must have zip
 29 |         return [] if (params[:distance] && params[:zip]) || (!params[:distance])
 30 |         [build_error(
 31 |           error: 'distance_error'
 32 |         )]
 33 |       end
 34 | 
 35 |       def report_wrong_zip(params)
 36 |         return [] if !params[:zip] || Zipcode.valid?(params[:zip])
 37 |         [build_error(
 38 |           error: 'zipcode_error',
 39 |           parameter: :zip,
 40 |           input: params[:zip].to_s
 41 |         )]
 42 |       end
 43 | 
 44 |       def report_nonexistent_params(params, config)
 45 |         return [] unless config.dictionary_only_search?
 46 |         params.keys.reject { |p| config.field_type(strip_op(p)) }.
 47 |           map { |p| build_error(error: 'parameter_not_found', input: strip_op(p)) }
 48 |       end
 49 | 
 50 |       def report_nonexistent_operators(params)
 51 |         params.keys.select { |p| p =~ /__(\w+)$/ && $1 !~ /range|not|ne/i }.
 52 |           map do |p|
 53 |             (param, op) = p.match(/^(.*)__(\w+)$/).captures
 54 |             build_error(error: 'operator_not_found', parameter: param, input: op)
 55 |           end
 56 |       end
 57 | 
 58 |       def report_nonexistent_fields(fields, config)
 59 |         if fields && !fields.empty? && config.dictionary_only_search?
 60 |           fields.reject { |f| config.field_type(f.to_s) }.
 61 |             map { |f| build_error(error: 'field_not_found', input: f.to_s) }
 62 |         else
 63 |           []
 64 |         end
 65 |       end
 66 | 
 67 |       def report_bad_range_argument(params)
 68 |         ranges = params.select do |p,v|
 69 |           p =~ /__range$/ and
 70 |             v !~ / ^(-?\d+(\.\d+)?)? # optional starting number
 71 |                    \.\.           # range dots
 72 |                    (-?\d+(\.\d+)?)?  # optional ending number
 73 |                    (,(\d+(\.\d+)?)?\.\.(\d+(\.\d+)?)?)* # and more, with commas
 74 |                    $/x
 75 |         end
 76 |         ranges.map do |p,v|
 77 |           build_error(error: 'range_format_error', parameter: strip_op(p), input: v)
 78 |         end
 79 |       end
 80 | 
 81 |       def report_wrong_field_type(params, config)
 82 |         bad_fields = params.select do |p, v|
 83 |           next false if p =~ /__range$/
 84 |           param_type = config.field_type(strip_op(p))
 85 |           value_type = guess_value_type(v)
 86 |           (param_type == "float" && value_type != "float" && value_type != "integer") or
 87 |             (param_type == "integer" && value_type != "integer")
 88 |         end
 89 |         bad_fields.map do |p, v|
 90 |           build_error(error: 'parameter_type_error', parameter: p, input: v,
 91 |                       expected_type: config.field_type(strip_op(p)),
 92 |                       input_type: guess_value_type(v))
 93 |         end
 94 |       end
 95 | 
 96 |       def build_error(opts)
 97 |         opts[:input] = sanitize_error_opts(opts[:input]) unless opts[:input].nil?
 98 |         opts[:parameter] = sanitize_error_opts(opts[:parameter]) unless opts[:parameter].nil?
 99 |         opts[:message] =
100 |           case opts[:error]
101 |           when 'invalid_or_incomplete_parameters'
102 |             "The command #{opts[:input]} requires a fields parameter."
103 |           when 'parameter_not_found'
104 |             "The input parameter '#{opts[:input]}' is not known in this dataset."
105 |           when 'field_not_found'
106 |             "The input field '#{opts[:input]}' (in the fields parameter) is not a field in this dataset."
107 |           when 'operator_not_found'
108 |             "The input operator '#{opts[:input]}' (appended to the parameter '#{opts[:parameter]}') is not known or supported. (Known operators: range, ne, not)"
109 |           when 'parameter_type_error'
110 |             "The parameter '#{opts[:parameter]}' expects a value of type #{opts[:expected_type]}, but received '#{opts[:input]}' which is a value of type #{opts[:input_type]}."
111 |           when 'range_format_error'
112 |             "The range '#{opts[:input]}' supplied to parameter '#{opts[:parameter]}' isn't in the correct format."
113 |           when 'zipcode_error'
114 |             "The provided zipcode, '#{opts[:input]}', is not valid."
115 |           when 'distance_error'
116 |             "Use of the 'distance' parameter also requires a 'zip' parameter."
117 |           end
118 |         opts
119 |       end
120 | 
121 |       def guess_value_type(value)
122 |         case value.to_s
123 |         when /^-?\d+$/
124 |           "integer"
125 |         when /^(-?\d+,?)+$/ # list of integers
126 |           "integer"
127 |         when /^-?\d+\.\d+$/
128 |           "float"
129 |         else
130 |           "string"
131 |         end
132 |       end
133 | 
134 |       def strip_op(param)
135 |         param.sub(/__\w+$/, '')
136 |       end
137 | 
138 |       def sanitize_error_opts(html)
139 |         strip_tags(html.to_s)
140 |       end
141 | 
142 |     end
143 |   end
144 | end
145 | 


--------------------------------------------------------------------------------
/lib/data_magic/example.rb:
--------------------------------------------------------------------------------
 1 | class Example < Hashie::Mash
 2 |   include Hashie::Extensions::Coercion
 3 |   include Hashie::Extensions::MergeInitializer
 4 |   coerce_key :name, String
 5 |   coerce_key :description, String
 6 |   coerce_key :params, String
 7 |   coerce_key :endpoint, String
 8 |   coerce_key :link, String
 9 |   def initialize(hash = {})
10 |    super
11 |    # we want to use this in a liquid template
12 |    # so all attributes needs to be plain data, not code
13 |    self[:link] = "/v1/#{endpoint}?#{params}" if self[:link].nil?
14 |  end
15 | 
16 | end
17 | 


--------------------------------------------------------------------------------
/lib/data_magic/index/builder_data.rb:
--------------------------------------------------------------------------------
 1 | module DataMagic
 2 |   module Index
 3 |     class BuilderData
 4 |       attr_reader :data, :options
 5 | 
 6 |       def initialize(data, options)
 7 |         @options = options
 8 |         @data = data
 9 |       end
10 | 
11 |       def additional_fields
12 |         options[:mapping] || {}
13 |       end
14 | 
15 |       def new_field_names
16 |         field_names = options[:fields] || {}
17 |         field_names.merge(additional_fields)
18 |       end
19 | 
20 |       def additional_data
21 |         options[:add_data]
22 |       end
23 |     end
24 |   end
25 | end
26 | 


--------------------------------------------------------------------------------
/lib/data_magic/index/document.rb:
--------------------------------------------------------------------------------
 1 | module DataMagic
 2 |   module Index
 3 |     class Document
 4 |       attr_reader :data, :id
 5 | 
 6 |       def initialize(data)
 7 |         @data = data
 8 |         @id = calculate_id
 9 |       end
10 | 
11 |       def remove_ids
12 |         config.data['unique'].each { |key| data.delete key }
13 |       end
14 | 
15 |       def headers
16 |         data.keys.map(&:to_s) # does this only return top level fields?
17 |       end
18 | 
19 |       def preview(n=500)
20 |         data.inspect[0..n]
21 |       end
22 | 
23 |       def id_empty?
24 |         id && id.empty?
25 |       end
26 | 
27 |       private
28 | 
29 |       def calculate_id
30 |         return nil if config.data['unique'].length == 0
31 |         config.data['unique'].map { |field| data[field] }.join(':')
32 |       end
33 | 
34 |       def config
35 |         DataMagic.config
36 |       end
37 |     end
38 |   end
39 | end
40 | 


--------------------------------------------------------------------------------
/lib/data_magic/index/event_logger.rb:
--------------------------------------------------------------------------------
 1 | module DataMagic
 2 |   module Index
 3 |     class EventLogger
 4 |       def trigger(event, *args)
 5 |         self.send(event, *args)
 6 |       end
 7 | 
 8 |       ['debug', 'info', 'warn', 'error'].each do |level|
 9 |         class_eval <<-RUBY, __FILE__, __LINE__ + 1
10 |           def #{level}(message, object=nil, limit=nil)
11 |             logger.#{level}(full_message(message, object, limit))
12 |           end
13 |         RUBY
14 |       end
15 | 
16 |       def full_message(prefix, object, limit)
17 |         return prefix unless object
18 |         message = "#{prefix}: "
19 |         if limit
20 |           message << object.inspect[0..limit]
21 |         else
22 |           message << object.inspect
23 |         end
24 |         message
25 |       end
26 |     end
27 |   end
28 | end
29 | 


--------------------------------------------------------------------------------
/lib/data_magic/index/importer.rb:
--------------------------------------------------------------------------------
  1 | require 'forwardable'
  2 | 
  3 | module DataMagic
  4 |   module Index
  5 |     class Importer
  6 |       attr_reader :raw_data, :options, :row_map
  7 | 
  8 |       def initialize(raw_data, options, row_map)
  9 |         @raw_data = raw_data
 10 |         @options = options
 11 |         @row_map = row_map
 12 |       end
 13 | 
 14 |       def process
 15 |         setup
 16 |         parse_and_log
 17 |         finish!
 18 |         [row_count, headers]
 19 |       end
 20 | 
 21 |       def client
 22 |         @client ||= SuperClient.new(es_client, options)
 23 |       end
 24 | 
 25 |       def builder_data
 26 |         @builder_data ||= BuilderData.new(raw_data, options)
 27 |       end
 28 | 
 29 |       def row_map
 30 |         @row_map || {}
 31 |       end
 32 | 
 33 |       def output
 34 |         @output ||= Output.new
 35 |       end
 36 | 
 37 |       def parse_and_log
 38 |         parse_csv
 39 |       rescue InvalidData => e
 40 |         trigger("error", e.message)
 41 |         raise InvalidData, "invalid file format" if empty?
 42 |       end
 43 | 
 44 |       def chunk_size
 45 |         (ENV['CHUNK_SIZE'] || 100).to_i
 46 |       end
 47 | 
 48 |       def nprocs
 49 |         (ENV['NPROCS'] || 1).to_i
 50 |       end
 51 | 
 52 |       def parse_csv
 53 |         if nprocs == 1
 54 |           parse_csv_whole
 55 |         elsif client.nested_partial?
 56 |           parse_csv_mapped
 57 |         else
 58 |           parse_csv_chunked
 59 |         end
 60 |         data.close
 61 |       end
 62 | 
 63 |       def parse_csv_whole
 64 |         CSV.new(
 65 |           data,
 66 |           headers: true,
 67 |           header_converters: lambda { |str| str.strip.to_sym }
 68 |         ).each do |row|
 69 |           dispatch_row_importer(row)
 70 |           break if at_limit?
 71 |         end
 72 |       end
 73 | 
 74 |       def parse_csv_chunked
 75 |         CSV.new(
 76 |           data,
 77 |           headers: true,
 78 |           header_converters: lambda { |str| str.strip.to_sym }
 79 |         ).each.each_slice(chunk_size) do |chunk|
 80 |           break if at_limit?
 81 |           chunks_per_proc = (chunk.size / nprocs.to_f).ceil
 82 |           Parallel.each(chunk.each_slice(chunks_per_proc)) do |rows|
 83 |             rows.each_with_index do |row, idx|
 84 |               dispatch_row_importer(row)
 85 |             end
 86 |           end
 87 |           if !headers
 88 |             single_document = DocumentBuilder.create(chunk.first, builder_data, DataMagic.config)
 89 |             set_headers(single_document)
 90 |           end
 91 |           increment(chunk.size)
 92 |         end
 93 |       end
 94 | 
 95 |       def parse_csv_mapped
 96 |         rocky_chunks = CSV.new(
 97 |           data,
 98 |           headers: true,
 99 |           header_converters: lambda { |str| str.strip.to_sym }
100 |         ).chunk_while { |a, b|
101 |           # chunk by nested document link
102 |           lookup_row_id(a) === lookup_row_id(b)
103 |         }.to_a
104 | 
105 |         # rearrange chunks for parallel processing, so our slices are 'roughly' the same size
106 |         sorted = rocky_chunks.sort_by(&:size)
107 |         grouped = sorted.each.each_with_index.group_by { |_, index| index % nprocs }
108 |         smooth_chunks = grouped.map { |_, data|
109 |           # here we only return the first array , each_with_index was adding in an unwanted index item
110 |           data.map(&:first)
111 |         }.flatten(1)
112 | 
113 |         chunks_per_proc = (smooth_chunks.size / nprocs.to_f).ceil
114 | 
115 |         Parallel.each(smooth_chunks.each_slice(chunks_per_proc)) do |chunks|
116 |           chunks.each do |chunk|
117 |             dispatch_row_importer(chunk)
118 |           end
119 |         end
120 |         increment(smooth_chunks.size)
121 |       end
122 | 
123 |       def dispatch_row_importer(row)
124 |         if client.nested_partial?
125 |           if row.is_a?(Array)
126 |             dispatch_row_bulk_importer(row)
127 |           else
128 |             row_id = lookup_row_id(row)
129 |             Array(row_map.map[row_id]).each do |related_id|
130 |               row << [row_map.id, related_id]
131 |               RowImporter.process(row, self)
132 |             end
133 |           end
134 |         else
135 |           RowImporter.process(row, self)
136 |         end
137 |       end
138 | 
139 |       def dispatch_row_bulk_importer(rows)
140 |         row_id = lookup_row_id(rows[0])
141 |         Array(row_map.map[row_id]).each do |related_id|
142 |           rows.each do |row|
143 |             row << [row_map.id, related_id]
144 |           end
145 |           RowBulkImporter.process(rows, self)
146 |         end
147 |       end
148 | 
149 |       def lookup_row_id(row)
150 |         link = row_map.calculate_column(options[:partial_map]['link'])
151 |         row.to_hash[link]
152 |       end
153 | 
154 |       def setup
155 |         client.create_index
156 |         log_setup
157 |       end
158 | 
159 |       def finish!
160 |         validate!
161 |         refresh_index if ENV['RACK_ENV'] == 'test'
162 |         log_finish
163 |       end
164 | 
165 |       def log_setup
166 |         opts = options.reject { |k,v| k == :mapping }
167 |         trigger("info", "options", opts)
168 |         trigger("info", "new_field_names", new_field_names)
169 |         trigger("info", "additional_data", additional_data)
170 |       end
171 | 
172 |       def log_finish
173 |         trigger("info", "skipped (missing parent id)", output.skipped) if !output.skipped.empty?
174 |         trigger('info', "done #{row_count} rows")
175 |       end
176 | 
177 |       def event_logger
178 |         @event_logger ||= EventLogger.new
179 |       end
180 | 
181 |       def at_limit?
182 |         options[:limit_rows] && row_count == options[:limit_rows]
183 |       end
184 | 
185 |       extend Forwardable
186 | 
187 |       def_delegators :output, :set_headers, :skipping, :skipped, :increment, :row_count, :log_limit,
188 |         :empty?, :validate!, :headers
189 |       def_delegators :builder_data, :data, :new_field_names, :additional_data
190 |       def_delegators :client, :refresh_index
191 |       def_delegators :event_logger, :trigger
192 | 
193 |       def self.process(*args)
194 |         new(*args).process
195 |       end
196 | 
197 |       private
198 | 
199 |       def es_client
200 |         DataMagic.client
201 |       end
202 |     end
203 |   end
204 | end
205 | 


--------------------------------------------------------------------------------
/lib/data_magic/index/output.rb:
--------------------------------------------------------------------------------
 1 | module DataMagic
 2 |   module Index
 3 |     class Output
 4 |       attr_reader :row_count, :headers, :skipped
 5 | 
 6 |       def initialize
 7 |         @row_count = 0
 8 |         @skipped = []
 9 |       end
10 | 
11 |       def set_headers(doc)
12 |         return if headers
13 |         if doc.is_a?(Array)
14 |           @headers = doc[0].headers
15 |         else
16 |           @headers = doc.headers
17 |         end
18 |       end
19 | 
20 |       def skipping(id)
21 |         skipped << id
22 |       end
23 | 
24 |       def increment(count = 1)
25 |         @row_count += count
26 |       end
27 | 
28 |       def validate!
29 |         raise DataMagic::InvalidData, "zero rows" if empty?
30 |       end
31 | 
32 |       def empty?
33 |         row_count == 0
34 |       end
35 | 
36 |       def log(doc)
37 |         log_0(doc) if empty?
38 |         log_marker if row_count % 500 == 0
39 |       end
40 | 
41 |       def log_skips
42 |         return if skipped.empty?
43 |         logger.info "skipped (missing parent id): #{skipped.join(',')}"
44 |       end
45 | 
46 |       def log_limit
47 |         logger.info "done now, limiting rows to #{row_count}"
48 |       end
49 | 
50 |       private
51 | 
52 |       def log_0(document)
53 |         logger.debug "csv parsed"
54 |         logger.info "row#{row_count} -> #{document.preview}"
55 |       end
56 | 
57 |       def log_marker
58 |         logger.info "indexing rows: #{row_count}..."
59 |       end
60 |     end
61 |   end
62 | end
63 | 


--------------------------------------------------------------------------------
/lib/data_magic/index/repository.rb:
--------------------------------------------------------------------------------
  1 | module DataMagic
  2 |   module Index
  3 |     class Repository
  4 |       attr_reader :client, :document
  5 | 
  6 |       def initialize(client, document)
  7 |         @client = client
  8 |         @document = document
  9 |       end
 10 | 
 11 |       def skipped?
 12 |         @skipped
 13 |       end
 14 | 
 15 |       def save
 16 |         @skipped = false
 17 |         if client.creating?
 18 |           create
 19 |         else
 20 |           update
 21 |         end
 22 |       end
 23 | 
 24 |       private
 25 | 
 26 |       def update
 27 |         if client.allow_skips?
 28 |           update_with_rescue
 29 |         else
 30 |           update_without_rescue
 31 |         end
 32 |       end
 33 | 
 34 |       def create
 35 |         client.index({
 36 |           index: client.index_name,
 37 |           id: document.id,
 38 |           type: 'document',
 39 |           body: document.data,
 40 |           timeout: '5m'
 41 |         })
 42 |       end
 43 | 
 44 |       def update_without_rescue
 45 |         if client.nested_partial?
 46 |           update_nested_partial
 47 |         else
 48 |           client.update({
 49 |               index: client.index_name,
 50 |               id: document.id,
 51 |               type: 'document',
 52 |               body: {doc: document.data},
 53 |               timeout: '5m'
 54 |           })
 55 |         end
 56 |       end
 57 | 
 58 |       def update_with_rescue
 59 |         update_without_rescue
 60 |       rescue Elasticsearch::Transport::Transport::Errors::NotFound
 61 |         @skipped = true
 62 |       end
 63 | 
 64 |       def update_nested_partial
 65 |         if document.is_a?(Array)
 66 |           update_bulk_nested_partial
 67 |         else
 68 |         doc = {
 69 |             index: client.index_name,
 70 |             id: document.id,
 71 |             type: 'document',
 72 |             body: {doc: document.data},
 73 |             timeout: '5m'
 74 |         }
 75 |         root_key = client.options[:nest]['key']
 76 |         partial_path =  client.options[:partial_map]['path']
 77 | 
 78 |         # extract some keys of the dotted path
 79 |         path_keys = partial_path.split('.')
 80 |         first = path_keys.first
 81 |         path_keys = path_keys.unshift(root_key)
 82 | 
 83 |         # extract the current row's nested data, in the case we're appending to an exiting array
 84 |         nested_item = document.data.dig(*path_keys)[0]
 85 | 
 86 |         # this script will either create the new nested array if it doesn't exist, or append the nested item
 87 |         script = "if (ctx._source['#{root_key}'].#{first} == null) { ctx._source['#{root_key}'].#{first} = data['#{root_key}'].#{first}; } else { ctx._source['#{root_key}'].#{partial_path} += inner; }"
 88 |         doc[:body] = { script: script, params: { inner: nested_item, data: document.data } }
 89 |         doc[:retry_on_conflict] = 5
 90 |         client.update(doc)
 91 |         end
 92 |       end
 93 | 
 94 |       def update_bulk_nested_partial
 95 |         root_key = client.options[:nest]['key']
 96 |         partial_path =  client.options[:partial_map]['path']
 97 | 
 98 |         # extract some keys of the dotted path
 99 |         path_keys = partial_path.split('.')
100 |         first = path_keys.first
101 |         path_keys = path_keys.unshift(root_key)
102 | 
103 |         nested_items = document.map do |doc|
104 |           doc.data.dig(*path_keys)[0]
105 |         end
106 | 
107 |         hash = NestedHash.new
108 |         hash.dotkey_set(path_keys.join('.'), nested_items)
109 | 
110 |         doc = {
111 |             index: client.index_name,
112 |             id: document[0].id,
113 |             type: 'document',
114 |             timeout: '5m'
115 |         }
116 |         # this script will either create the full object path and new nested array if it doesn't exist already, or create the new nested items array
117 |         script = "if (ctx._source['#{root_key}'] == null) { ctx._source['#{root_key}'] = data['#{root_key}']; } else { if (ctx._source['#{root_key}'].#{first} == null) { ctx._source['#{root_key}'].#{first} = data['#{root_key}'].#{first}; } else { ctx._source['#{root_key}'].#{partial_path} = inner; } }"
118 |         doc[:body] = { script: script, params: { inner: nested_items, data: hash } }
119 |         doc[:retry_on_conflict] = 5
120 |         client.update(doc)
121 |       end
122 |     end
123 |   end
124 | end
125 | 


--------------------------------------------------------------------------------
/lib/data_magic/index/row_bulk_importer.rb:
--------------------------------------------------------------------------------
 1 | require 'forwardable'
 2 | 
 3 | module DataMagic
 4 |   module Index
 5 |     class RowBulkImporter
 6 |       attr_reader :rows, :importer
 7 | 
 8 |       def initialize(rows, importer)
 9 |         @rows = rows
10 |         @importer = importer
11 |       end
12 | 
13 |       def process
14 |         log_row_start
15 |         before_save
16 |         save
17 |         after_save
18 |         log_row_end
19 |       end
20 | 
21 |       def documents
22 |         @documents ||= DocumentBuilder.create(rows, importer.builder_data, config)
23 |       end
24 | 
25 |       def repository
26 |         @repository ||= Repository.new(importer.client, documents)
27 |       end
28 | 
29 |       private
30 | 
31 |       def log_row_start
32 |         trigger("debug", "csv parsed") if importer.empty?
33 |         trigger("info", "row #{importer.row_count}", documents, 500) if importer.row_count % 500 == 0
34 |         #trigger("info", "id", document.id)
35 |         if documents[0].id_empty?
36 |           trigger("warn", "blank id")
37 |           trigger("warn", "unique", config.data["unique"])
38 |           trigger("warn", "in row", documents, 255)
39 |         end
40 |       end
41 | 
42 |       def before_save
43 |         importer.set_headers(documents)
44 |       end
45 | 
46 |       def save
47 |         repository.save
48 |       end
49 | 
50 |       def after_save
51 |         importer.skipping(documents[0].id) if repository.skipped?
52 |         importer.increment
53 |       end
54 | 
55 |       def log_row_end
56 |         return if !importer.at_limit?
57 |         trigger("info", "done now, limiting rows to #{importer.row_count}")
58 |       end
59 | 
60 |       def config
61 |         DataMagic.config
62 |       end
63 | 
64 |       extend Forwardable
65 | 
66 |       def_delegators :importer, :trigger
67 | 
68 |       def self.process(*args)
69 |         new(*args).process
70 |       end
71 |     end
72 |   end
73 | end
74 | 


--------------------------------------------------------------------------------
/lib/data_magic/index/row_importer.rb:
--------------------------------------------------------------------------------
 1 | require 'forwardable'
 2 | 
 3 | module DataMagic
 4 |   module Index
 5 |     class RowImporter
 6 |       attr_reader :row, :importer
 7 | 
 8 |       def initialize(row, importer)
 9 |         @row = row
10 |         @importer = importer
11 |       end
12 | 
13 |       def process
14 |         log_row_start
15 |         before_save
16 |         save
17 |         after_save
18 |         log_row_end
19 |       end
20 | 
21 |       def document
22 |         @document ||= DocumentBuilder.create(row, importer.builder_data, config)
23 |       end
24 | 
25 |       def repository
26 |         @repository ||= Repository.new(importer.client, document)
27 |       end
28 | 
29 |       private
30 | 
31 |       def log_row_start
32 |         trigger("debug", "csv parsed") if importer.empty?
33 |         trigger("info", "row #{importer.row_count}", document, 500) if importer.row_count % 500 == 0
34 |         #trigger("info", "id", document.id)
35 |         if document.id_empty?
36 |           trigger("warn", "blank id")
37 |           trigger("warn", "unique", config.data["unique"])
38 |           trigger("warn", "in row", document, 255)
39 |         end
40 |       end
41 | 
42 |       def before_save
43 |         importer.set_headers(document)
44 |       end
45 | 
46 |       def save
47 |         repository.save
48 |       end
49 | 
50 |       def after_save
51 |         importer.skipping(document.id) if repository.skipped?
52 |         importer.increment
53 |       end
54 | 
55 |       def log_row_end
56 |         return if !importer.at_limit?
57 |         trigger("info", "done now, limiting rows to #{importer.row_count}")
58 |       end
59 | 
60 |       def config
61 |         DataMagic.config
62 |       end
63 | 
64 |       extend Forwardable
65 | 
66 |       def_delegators :importer, :trigger
67 | 
68 |       def self.process(*args)
69 |         new(*args).process
70 |       end
71 |     end
72 |   end
73 | end
74 | 


--------------------------------------------------------------------------------
/lib/data_magic/index/row_map.rb:
--------------------------------------------------------------------------------
 1 | module DataMagic
 2 |   module Index
 3 |     class RowMap
 4 |         attr_reader :map, :id, :related
 5 | 
 6 |         def initialize(primary_key, join_key)
 7 |           @id = calculate_column(primary_key)
 8 |           @related = calculate_column(join_key)
 9 |           @map = {}
10 |         end
11 | 
12 |         def add_item(row)
13 |           # only add unique ids to the related key array
14 |           @map[row[@related]] = (@map[row[@related]] ||= []) | [row[@id]]
15 |         end
16 | 
17 |         def map
18 |           @map
19 |         end
20 | 
21 |         def calculate_column(value)
22 |           column_name = DataMagic::config.field_mapping.invert[value]
23 |           column_name.to_sym unless column_name.nil?
24 |         end
25 | 
26 |     end
27 |   end
28 | end
29 | 


--------------------------------------------------------------------------------
/lib/data_magic/index/super_client.rb:
--------------------------------------------------------------------------------
 1 | require 'forwardable'
 2 | 
 3 | module DataMagic
 4 |   module Index
 5 |     class SuperClient
 6 |       attr_reader :client, :options
 7 | 
 8 |       def initialize(client, options)
 9 |         @client = client
10 |         @options = options
11 |       end
12 | 
13 |       def create_index
14 |         DataMagic.create_index unless config.index_exists?
15 |       end
16 | 
17 |       def refresh_index
18 |         client.indices.refresh index: index_name
19 |       end
20 | 
21 |       def creating?
22 |         options[:root] || options[:nest] == nil
23 |       end
24 | 
25 |       def nested_partial?
26 |         options[:map]
27 |       end
28 | 
29 |       def allow_skips?
30 |         options[:nest][:parent_missing] == 'skip'
31 |       end
32 | 
33 |       def index_name
34 |         config.scoped_index_name
35 |       end
36 | 
37 |       def config
38 |         DataMagic.config
39 |       end
40 | 
41 |       extend Forwardable
42 | 
43 |       def_delegators :client, :index, :update
44 |     end
45 |   end
46 | end
47 | 


--------------------------------------------------------------------------------
/lib/expression/eval.rb:
--------------------------------------------------------------------------------
 1 | 
 2 | class Expression
 3 |   class Eval < Parslet::Transform
 4 |     rule(:var => simple(:var)) {
 5 |       variables[String(var)]
 6 |     }
 7 | 
 8 |     # in Ruby 0 is 'truthy' but that's not what most people expect
 9 |     rule(:or => { :left => subtree(:left), :right => subtree(:right) }) do
10 |       left == 0 ? right : (left or right)
11 |     end
12 | 
13 |     rule(:and => { :left => subtree(:left), :right => subtree(:right) }) do
14 |       left == 0 ? left : (left and right)
15 |     end
16 |   end
17 | end
18 | 


--------------------------------------------------------------------------------
/lib/expression/expression.rb:
--------------------------------------------------------------------------------
 1 | require_relative 'parser'
 2 | require_relative 'eval'
 3 | require_relative 'variables'
 4 | require 'hashie'
 5 | 
 6 | class Expression
 7 |   attr_accessor :name   # purely for reporting Errors
 8 |   attr_reader   :variables
 9 | 
10 |   def initialize(expr, name = 'unknown')
11 |     @tree = Parser.new.parse(expr)
12 |     @variables = Variables.new.apply(@tree)
13 |   end
14 | 
15 |   def evaluate(vars)
16 |     Hashie.stringify_keys! vars
17 |     Eval.new.apply(@tree, variables: vars)
18 |   end
19 | 
20 |   def self.find_or_create(expr, name = 'unknown')
21 |     @cached_expression ||= {}
22 |     @cached_expression[expr] ||= Expression.new(expr, name)
23 |     @cached_expression[expr]
24 |   end
25 | end
26 | 


--------------------------------------------------------------------------------
/lib/expression/parser.rb:
--------------------------------------------------------------------------------
 1 | require 'parslet'
 2 | # based on https://github.com/kschiess/parslet/blob/master/example/boolean_algebra.rb
 3 | # usage:
 4 | # def parse(str)
 5 | #   ExpressionParser.new.parse(str)
 6 | #
 7 | # rescue Parslet::ParseFailed => failure
 8 | #   puts failure.cause.ascii_tree
 9 | # end
10 | #
11 | # tree = ExpressionParser.new.parse("one or two")
12 | #  => {:or=>{:left=>{:var=>"one"@0}, :right=>{:var=>"two"@7}}}
13 | # Eval.new.apply(tree, variables: {"one"=>1, "two"=>2})
14 | #
15 | # Variables.new.apply(tree)
16 | 
17 | class Expression
18 |   class Parser < Parslet::Parser
19 |   rule(:space)  { match[" "].repeat(1) }
20 |   rule(:space?) { space.maybe }
21 | 
22 |   rule(:lparen) { str("(") >> space? }
23 |   rule(:rparen) { str(")") >> space? }
24 | 
25 |   rule(:and_operator) { str("and") >> space? }
26 |   rule(:or_operator)  { str("or")  >> space? }
27 | 
28 |   rule(:var) { match["[^\s\(\)]"].repeat(1).as(:var) >> space? }
29 | 
30 |   # The primary rule deals with parentheses.
31 |   rule(:primary) { lparen >> or_operation >> rparen | var }
32 | 
33 |   # Note that following rules are both right-recursive.
34 |   rule(:and_operation) {
35 |     (primary.as(:left) >> and_operator >>
36 |       and_operation.as(:right)).as(:and) |
37 |     primary }
38 | 
39 |   rule(:or_operation)  {
40 |     (and_operation.as(:left) >> or_operator >>
41 |       or_operation.as(:right)).as(:or) |
42 |     and_operation }
43 | 
44 |   # We start at the lowest precedence rule.
45 |   root(:or_operation)
46 | end
47 | end
48 | 


--------------------------------------------------------------------------------
/lib/expression/variables.rb:
--------------------------------------------------------------------------------
 1 | require 'parslet'
 2 | 
 3 | class Expression
 4 |   class Variables < Parslet::Transform
 5 |     rule(:var => simple(:var)) {
 6 |       [String(var)]
 7 |     }
 8 |     rule(:or => { :left => subtree(:left), :right => subtree(:right) }) do
 9 |       (left + right)
10 |     end
11 | 
12 |     rule(:and => { :left => subtree(:left), :right => subtree(:right) }) do
13 |       (left + right)
14 |     end
15 | 
16 |   end
17 | end
18 | 


--------------------------------------------------------------------------------
/lib/nested_hash.rb:
--------------------------------------------------------------------------------
 1 | class NestedHash < Hash
 2 | 
 3 |   def initialize(hash = {}, default = nil, &block)
 4 |     default ? super(default) : super(&block)
 5 |     self.add(hash)
 6 |   end
 7 | 
 8 |   def add(hash)
 9 |     hash.each do |full_name, value|
10 |       parts = full_name.to_s.split('.')
11 |       last = parts.length - 1
12 |       add_to = self
13 |       parts.each_with_index do |name, index|
14 |         if index == last
15 |           add_to[name] = value
16 |         else
17 |           add_to[name] ||= {}
18 |           add_to = add_to[name]
19 |         end
20 |       end
21 |     end
22 |     self
23 |   end
24 | 
25 |   # generate a flat, non-nested hash
26 |   # with keys that have dots representing the hierarchy
27 |   def withdotkeys(deep_hash = self, flat_hash = {}, root = '')
28 |     deep_hash.each do |k, value|
29 |       key = root + k
30 |       if value.is_a?(Hash)
31 |         flat_hash.merge! withdotkeys(value, flat_hash, key + '.')
32 |       else
33 |         flat_hash[key] = value
34 |       end
35 |     end
36 |     flat_hash
37 |   end
38 | 
39 |   # generate a list of the keys with dots representing the hierarchy
40 |   def dotkeys(row = self, prefix = '')
41 |     human_names = []
42 |     row.keys.each do |k|
43 |       key = prefix + k
44 |       if row[k].is_a?(Hash)
45 |         new_human_names = dotkeys(row[k], key + '.')
46 |           human_names += new_human_names
47 |       else
48 |         human_names << key
49 |       end
50 |     end
51 |     human_names
52 |   end
53 | 
54 |   # set a new or existing nested key's value by a dotted-string key
55 |   def dotkey_set(dottedkey, value, deep_hash = self)
56 |     keys = dottedkey.to_s.split('.')
57 |     first = keys.first
58 |     if keys.length == 1
59 |       deep_hash[first] = value
60 |     else
61 |       # in the case that we are creating a hash from a dotted key, we'll assign a default
62 |       deep_hash[first] = (deep_hash[first] || {})
63 |       dotkey_set(keys.slice(1..-1).join('.'), value, deep_hash[first])
64 |     end
65 |   end
66 | end
67 | 


--------------------------------------------------------------------------------
/lib/sass_initializer.rb:
--------------------------------------------------------------------------------
 1 | module SassInitializer
 2 |   def self.registered(app)
 3 |     # Enables support for SASS template reloading in rack applications.
 4 |     # See http://nex-3.com/posts/88-sass-supports-rack for more details.
 5 |     # Store SASS files (by default) within 'app/stylesheets'.
 6 |     require 'sass/plugin/rack'
 7 |     Sass::Plugin.options[:template_location] = Padrino.root("app/stylesheets")
 8 |     Sass::Plugin.options[:css_location] = Padrino.root("public/stylesheets")
 9 |     app.use Sass::Plugin::Rack
10 |   end
11 | end
12 | 


--------------------------------------------------------------------------------
/lib/zipcode/zipcode.rb:
--------------------------------------------------------------------------------
 1 | # Zipcode latitude and longitude data in us_zipcodes.txt
 2 | # provided by [GeoNames](http://www.geonames.org/)
 3 | # under under a Creative Commons Attribution 3.0 License:
 4 | # http://creativecommons.org/licenses/by/3.0/
 5 | 
 6 | # this code is in public domain (CC0 1.0)
 7 | # https://github.com/18F/open-data-maker/blob/dev/LICENSE.md
 8 | 
 9 | require 'csv'
10 | 
11 | class Zipcode
12 |   @@zipcode_hash = nil
13 | 
14 |   def Zipcode.latlon(zipcode)
15 |     zipcode = zipcode.to_s
16 |     @@zipcode_hash ||= converted_zipcodes
17 |     @@zipcode_hash[zipcode]
18 |   end
19 | 
20 |   def Zipcode.valid?(zipcode)
21 |     !!self.latlon(zipcode)
22 |   end
23 | 
24 |   private
25 |     def self.converted_zipcodes
26 |       parsed_file = CSV.read(File.expand_path("../us_zipcodes.txt", __FILE__), { :col_sep => "\t" })
27 |       zipcode_hash = {}
28 |       parsed_file.each do |row|
29 |         zipcode = row[1]
30 |         lat = row[9].to_f
31 |         lon = row[10].to_f
32 |         zipcode_hash[zipcode] = {'lat': lat, 'lon': lon}
33 |       end
34 |       zipcode_hash
35 |     end
36 | 
37 | end
38 | 


--------------------------------------------------------------------------------
/log/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/log/.gitkeep


--------------------------------------------------------------------------------
/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/public/favicon.ico


--------------------------------------------------------------------------------
/public/javascripts/application.js:
--------------------------------------------------------------------------------
1 | // Put your application scripts here


--------------------------------------------------------------------------------
/public/javascripts/jquery-ujs.js:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Padrino Javascript Jquery Adapter
  3 |  * Created for use with Padrino Ruby Web Framework (http://www.padrinorb.com)
  4 | **/
  5 | 
  6 | /* Remote Form Support
  7 |  * form_for @user, '/user', :remote => true
  8 | **/
  9 | 
 10 | $(function(){
 11 |     $('form').on('submit', function(e) {
 12 |       var element = $(this), message = element.data('confirm');
 13 |       if (message && !confirm(message)) { return false; }
 14 |       if (element.data('remote') == true) {
 15 |         e.preventDefault(); e.stopped = true;
 16 |         JSAdapter.sendRequest(element, {
 17 |           verb: element.data('method') || element.attr('method') || 'post',
 18 |           url: element.attr('action'),
 19 |           dataType: element.data('type') || ($.ajaxSettings && $.ajaxSettings.dataType) || 'script',
 20 |           params: element.serializeArray()
 21 |         });
 22 |       }
 23 |     });
 24 | 
 25 |     /* Confirmation Support
 26 |      * link_to 'sign out', '/logout', :confirm => 'Log out?'
 27 |     **/
 28 | 
 29 |     $(document).on('click', 'a[data-confirm]', function(e) {
 30 |       var message = $(this).data('confirm');
 31 |       if (!confirm(message)) { e.preventDefault(); e.stopped = true; }
 32 |     });
 33 | 
 34 |     /*
 35 |      * Link Remote Support
 36 |      * link_to 'add item', '/create', :remote => true
 37 |     **/
 38 | 
 39 |     $(document).on('click', 'a[data-remote=true]', function(e) {
 40 |       var element = $(this);
 41 |       if (e.stopped) return;
 42 |       e.preventDefault(); e.stopped = true;
 43 |       JSAdapter.sendRequest(element, {
 44 |         verb: element.data('method') || 'get',
 45 |         url: element.attr('href')
 46 |       });
 47 |     });
 48 | 
 49 |     /*
 50 |      * Link Method Support
 51 |      * link_to 'delete item', '/destroy', :method => :delete
 52 |     **/
 53 | 
 54 |     $(document).on('click', 'a[data-method]:not([data-remote])', function(e) {
 55 |       if (e.stopped) return;
 56 |       JSAdapter.sendMethod($(this));
 57 |       e.preventDefault(); e.stopped = true;
 58 |     });
 59 | 
 60 |     /* JSAdapter */
 61 |     var JSAdapter = {
 62 |       // Sends an xhr request to the specified url with given verb and params
 63 |       // JSAdapter.sendRequest(element, { verb: 'put', url : '...', params: {} });
 64 |       sendRequest: function(element, options) {
 65 |         var verb = options.verb, url = options.url, params = options.params, dataType = options.dataType;
 66 |         var event = element.trigger('ajax:before');
 67 |         if (event.stopped) return false;
 68 |         $.ajax({
 69 |           url: url,
 70 |           type: verb.toUpperCase() || 'POST',
 71 |           data: params || [],
 72 |           dataType: dataType,
 73 | 
 74 |           beforeSend: function(request) { element.trigger('ajax:loading',  [ request ]); },
 75 |           complete:   function(request) { element.trigger('ajax:complete', [ request ]); },
 76 |           success:    function(request) { element.trigger('ajax:success',  [ request ]); },
 77 |           error:      function(request) { element.trigger('ajax:failure',  [ request ]); }
 78 |         });
 79 |         element.trigger('ajax:after');
 80 |       },
 81 |       // Triggers a particular method verb to be triggered in a form posting to the url
 82 |       // JSAdapter.sendMethod(element);
 83 |       sendMethod: function(element) {
 84 |         var verb = element.data('method');
 85 |         var url = element.attr('href');
 86 |         var form = $('<form method="post" action="'+url+'"></form>');
 87 |         var csrf_token = $('meta[name=csrf-token]').attr('content');
 88 |         var csrf_param = $('meta[name=csrf-param]').attr('content');
 89 |         form.hide().appendTo('body');
 90 |         if (verb !== 'post') {
 91 |           var field = '<input type="hidden" name="_method" value="' + verb + '" />';
 92 |           form.append(field);
 93 |         }
 94 |         if (csrf_param !== undefined && csrf_token !== undefined) {
 95 |           var field = '<input type="hidden" name="' + csrf_param + '" value="' + csrf_token + '" />';
 96 |           form.append(field);
 97 |         }
 98 |         form.submit();
 99 |       }
100 |     };
101 | 
102 |     // Every xhr request is sent along with the CSRF token.
103 |     $.ajaxPrefilter(function(options, originalOptions, xhr) {
104 |       if (options.verb !== 'GET') {
105 |         var token = $('meta[name="csrf-token"]').attr('content');
106 |         if (token) xhr.setRequestHeader('X-CSRF-Token', token);
107 |       }
108 |     });
109 | });
110 | 


--------------------------------------------------------------------------------
/sample-data/data.yaml:
--------------------------------------------------------------------------------
  1 | version: cities100-2010
  2 | # cities100.txt
  3 | # National Places Gazetteer Files, from US Census 2010
  4 | # https://www.census.gov/geo/maps-data/data/gazetteer2010.html
  5 | # (head -n 1 source.txt && tail -n +2 source.txt | LC_ALL=C sort -k7rn,7 -t$'\t' source.txt) > result.txt
  6 | # head -n 101 results.txt > cities100.txt
  7 | # then convertes to csv and removed " city" from after each city name
  8 | index: city-data
  9 | api: cities
 10 | unique: ["name"]
 11 | 
 12 | options:
 13 |   search: dictionary_only # API provides error when requesting fields not in dictionary
 14 | 
 15 | dictionary:
 16 |   id:
 17 |     source: GEOID
 18 |     description: >
 19 |       Geographic Identifier - fully concatenated geographic code (State FIPS and
 20 |       County FIPS). The Census Bureau and other state and federal agencies are
 21 |       responsible for assigning geographic identifiers, or GEOIDs, to geographic
 22 |       entities to facilitate the organization, presentation, and
 23 |       exchange of geographic and statistical data. GEOIDs are numeric codes that
 24 |       uniquely identify all administrative/legal and statistical geographic areas for
 25 |       which the Census Bureau tabulates data. From Alaska, the largest state,
 26 |       to the smallest census block in New York City, every geographic area
 27 |       has a unique GEOID. Data users rely on GEOIDs to join the appropriate
 28 |       demographic data from censuses and surveys, such as the
 29 |       American Community Survey (ACS), to various levels of geography for data
 30 |       analysis, interpretation and mapping.
 31 |   code:
 32 |     source: ANSICODE
 33 |     description: >
 34 |       American National Standards Institute codes (ANSI codes)
 35 |       are standardized numeric or alphabetic codes issued by the American
 36 |       National Standards Institute (ANSI) to ensure uniform identification of
 37 |       geographic entities through all federal government agencies.
 38 |   name:
 39 |     source: NAME
 40 |     description: The name of the city
 41 |     type: literal
 42 |   state:
 43 |     source: USPS
 44 |     description: Two letter state abbreviation
 45 |   population:
 46 |     source: POP10
 47 |     description: City population from 2010 Census data
 48 |     type: integer
 49 |   location.lat: INTPTLAT
 50 |   location.lon: INTPTLONG
 51 |   land_area:
 52 |     source: ALAND_SQMI
 53 |     description: Land Area (square miles)
 54 |     type: float
 55 |   area.water:
 56 |     description: Water Area (square miles)
 57 |     source: AWATER_SQMI
 58 |     type: float
 59 | 
 60 | categories:
 61 |   general:
 62 |     title: General
 63 |     description: >
 64 |       general information about the city, including standard
 65 |       identifiers and actual census summary data about the population of the city.
 66 |     fields: [id, code, name, state, population]
 67 |   geographic:
 68 |     title: Geographic
 69 |     description: >
 70 |       Geographic characteristics of the area. These are created for
 71 |       statistical purposes only.  Depiction and designation for statistical
 72 |       purposes does not constitute a determination of jurisdictional authority
 73 |       or rights of ownership or entitlement.
 74 |     fields: [location, area.land, area.water]
 75 |   general2:
 76 |     title: General2
 77 |     description: >
 78 |       general information about the city, including standard
 79 |       identifiers and actual census summary data about the population of the city.
 80 |     fields: [id, code, name, state, population]
 81 |   general3:
 82 |     title: General3
 83 |     description: >
 84 |       short
 85 |     fields: [id, code, name, state, population]
 86 |   general4:
 87 |     title: General4
 88 |     description: >
 89 |       short
 90 |     fields: [id, code, name, state, population]
 91 |   general5:
 92 |     title: General5
 93 |     description: >
 94 |       general information about the city, including standard
 95 |       identifiers and actual census summary data about the population of the city.
 96 |     fields: [id, code, name, state, population]
 97 | 
 98 | files:
 99 |   - name: cities100.csv
100 | 


--------------------------------------------------------------------------------
/script/bomstrip.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | mkdir -p new
3 | 
4 | for filename in ./*.csv; do
5 |       awk 'NR==1{sub(/^\xef\xbb\xbf/,"")}1' "$filename" > new/$filename
6 | done
7 | 
8 | #find . -print0 -type f |  awk 'NR==1{sub(/^\xef\xbb\xbf/,"")}1' {} > new/{}
9 | 


--------------------------------------------------------------------------------
/script/bootstrap:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -e
 4 | 
 5 | fancy_echo() {
 6 |   local fmt="$1"; shift
 7 | 
 8 |   # shellcheck disable=SC2059
 9 |   printf "\n$fmt\n" "$@"
10 | }
11 | 
12 | brew_install_or_upgrade() {
13 |   if brew_is_installed "$1"; then
14 |     if brew_is_upgradable "$1"; then
15 |       fancy_echo "Upgrading %s ..." "$1"
16 |       brew upgrade "$@"
17 |     else
18 |       fancy_echo "Already using the latest version of %s. Skipping ..." "$1"
19 |     fi
20 |   else
21 |     fancy_echo "Installing %s ..." "$1"
22 |     brew install "$@"
23 |   fi
24 | }
25 | 
26 | brew_is_installed() {
27 |   brew list -1 | grep -Fqx "$1"
28 | }
29 | 
30 | brew_is_upgradable() {
31 |   ! brew outdated --quiet "$1" >/dev/null
32 | }
33 | 
34 | brew_tap_is_installed() {
35 |   brew tap | grep -Fqx "$1"
36 | }
37 | 
38 | brew_tap() {
39 |   if ! brew_tap_is_installed "$1"; then
40 |     fancy_echo "Tapping $1..."
41 |     brew tap "$1" 2> /dev/null
42 |   fi
43 | }
44 | 
45 | echo 'Installing dependencies...'
46 | 
47 | if command -v brew >/dev/null; then
48 |   brew update
49 | 
50 |   brew_tap 'homebrew/services'
51 |   brew_tap 'homebrew/versions'
52 |   brew_install_or_upgrade 'elasticsearch17'
53 | 
54 |   brew services restart elasticsearch17
55 | 
56 |   # elasticsearch takes several seconds to load
57 |   sleep 10
58 | fi
59 | 
60 | gem install bundler --conservative
61 | bundle check || bundle install
62 | 
63 | echo "All done!"
64 | 


--------------------------------------------------------------------------------
/script/makeutf8.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # convert to utf8 and strip Byte Order Mark (BOM) if present
3 | mkdir -p utf8
4 | 
5 | for file in *.csv; do
6 |     echo "$file"
7 |     iconv -f ascii -t utf-8 "$file" | awk 'NR==1{sub(/^\xef\xbb\xbf/,"")}1' > "./utf8/${file%.txt}"
8 | done
9 | 


--------------------------------------------------------------------------------
/script/s3config.rb:
--------------------------------------------------------------------------------
 1 | # configure S3 with local credentials based on environment
 2 | # usage (from ruby script or irb):
 3 | #    require 's3config.rb'
 4 | #    @s3 = ::Aws::S3::Client.new
 5 | 
 6 | require 'dotenv'
 7 | 
 8 | branch = `echo $(git symbolic-ref --short HEAD)`.chomp
 9 | 
10 | if ENV['APP_ENV']
11 |   APP_ENV = ENV['APP_ENV']
12 |   puts "using APP_ENV from environment #{APP_ENV}"
13 | else
14 |   case branch
15 |     when "master"
16 |       APP_ENV = "production"
17 |     when "staging"
18 |       APP_ENV = "staging"
19 |     else
20 |       puts "not on master or staging branch lets use dev"
21 |       APP_ENV = "dev"
22 |   end
23 | end
24 | 
25 | Dotenv.load(
26 |   File.expand_path("../../.#{APP_ENV}.env", __FILE__),
27 |   File.expand_path("../../.env",  __FILE__))
28 | 
29 | require 'aws-sdk'
30 | puts "app env: #{APP_ENV}"
31 | puts "bucket name: #{ENV['s3_bucket']}"
32 | 
33 | 
34 | s3cred = {'access_key'=>  ENV['s3_access_key'], 'secret_key' => ENV['s3_secret_key']}
35 | 
36 | ::Aws.config[:credentials] = ::Aws::Credentials.new(s3cred['access_key'], s3cred['secret_key'])
37 | ::Aws.config[:region] = 'us-east-1'
38 | 


--------------------------------------------------------------------------------
/script/s3pull:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby -v
 2 | 
 3 | require_relative 's3config.rb'
 4 | 
 5 | @s3 = ::Aws::S3::Client.new
 6 | 
 7 | bucket = ENV['s3_bucket']
 8 | 
 9 | dirname = 'real-data'
10 | unless File.directory?(dirname)
11 |   FileUtils.mkdir_p(dirname)
12 | end
13 | datayamlpath = File.expand_path("../../#{dirname}/#{bucket}.yaml",  __FILE__)
14 | 
15 | File.open(datayamlpath, 'w') do |file|
16 |   response = @s3.get_object(bucket: bucket, key: 'data.yaml')
17 |   file << response.body.read
18 | end
19 | 


--------------------------------------------------------------------------------
/script/s3push:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby -v
 2 | 
 3 | require_relative 's3config.rb'
 4 | 
 5 | @s3 = ::Aws::S3::Client.new
 6 | 
 7 | dirname = 'real-data'
 8 | bucket_name = ENV['s3_bucket']
 9 | datayamlpath = File.expand_path("../../#{dirname}/#{bucket_name}.yaml",  __FILE__)
10 | 
11 | puts "copying #{datayamlpath}"
12 | puts "to S3 #{bucket_name}"
13 | File.open(datayamlpath, 'r') do |file|
14 |   @s3.put_object(bucket: bucket_name, key: 'data.yaml', body: file)
15 | end
16 | 


--------------------------------------------------------------------------------
/spec/features/web_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | describe 'app', type: 'feature' do
 4 |   before do
 5 |     DataMagic.destroy
 6 |     ENV['DATA_PATH'] = './spec/fixtures/sample-data'
 7 |     DataMagic.init(load_now: true)
 8 |   end
 9 | 
10 |   after do
11 |     DataMagic.destroy
12 |   end
13 | 
14 |   it "should load the home page" do
15 |     get '/'
16 |     expect(last_response).to be_ok
17 |   end
18 | 
19 |   it "should display links to endpoints" do
20 |     get '/'
21 |     expect(last_response.body).to include '<a href="/v1/cities">cities</a>'
22 |   end
23 | 
24 |   it "should display a list of categories" do
25 |     get '/'
26 |     expect(last_response.body).to include('Browse Data Details by Category')
27 |     expect(last_response.body).to include('General') # category name
28 |     expect(last_response.body).to include('general information about the city, including standard identifiers')
29 |   end
30 | 
31 |   it "should load the correct category page" do
32 |     get '/category/general'
33 |     expect(last_response.body).to include('Data Details for the')
34 |     expect(last_response.body).to include('category_entry = {"title":"General"')
35 |     expect(last_response.body).to include('population') # a field name
36 |     expect(last_response.body).to include('The name of the city') # a field description
37 |     expect(last_response.body).to include('literal') # field type
38 |   end
39 | end
40 | 


--------------------------------------------------------------------------------
/spec/fixtures/bom/bom.csv:
--------------------------------------------------------------------------------
1 | ﻿UNITID,VAL
2 | 100654,00100200
3 | 100663,00105200
4 | 100690,02503400
5 | 100706,00105500
6 | 100724,00100500
7 | 


--------------------------------------------------------------------------------
/spec/fixtures/bom/data.yaml:
--------------------------------------------------------------------------------
 1 | version: byte-order-mark
 2 | index: test-data
 3 | api: test
 4 | dictionary:
 5 |   id: UNITID
 6 |   value: VAL
 7 | 
 8 | files:
 9 |   - name: bom.csv
10 | 


--------------------------------------------------------------------------------
/spec/fixtures/calculated_columns/data.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | version: Aug6-2015-08-10-23:48-0600
 3 | api: fakeschool
 4 | index: fakeschool-data
 5 | unique:
 6 | - id
 7 | options:
 8 |   limit_files: 1
 9 |   limit_rows: 100
10 | 
11 | dictionary:
12 |   id:
13 |     source: UNITID
14 |     type: integer
15 |     description: Unit ID for institution
16 |   school.name:
17 |     source: INSTNM
18 |     description: Institution name
19 |   integer1:
20 |     source: INT1
21 |     type: integer
22 |   integer2:
23 |     source: INT2
24 |     type: integer
25 |   integer3:
26 |     source: INT3
27 |     type: integer
28 |   integer4:
29 |     source: INT4
30 |     type: integer
31 |   summarybool:
32 |     calculate: INT1 or INT2 or INT3 or INT4
33 |     type: boolean
34 |     description: are any of the unparsed booleans true?
35 | 
36 | files:
37 |   - name: schools.csv
38 | 


--------------------------------------------------------------------------------
/spec/fixtures/calculated_columns/schools.csv:
--------------------------------------------------------------------------------
1 | UNITID,INSTNM,INT1,INT2,INT3,INT4
2 | 1,Big School,0,0,2,0
3 | 2,Small School,0,0,0,0
4 | 3,Middle School,0,1,1,0
5 | 


--------------------------------------------------------------------------------
/spec/fixtures/cities_with_yml/cities51-100.csv:
--------------------------------------------------------------------------------
 1 | "USPS","GEOID","ANSICODE","NAME","LSAD","FUNCSTAT","POP10","HU10","ALAND","AWATER","ALAND_SQMI","AWATER_SQMI","INTPTLAT","INTPTLONG"
 2 | "TX","4804000","02409731","Arlington","25","A","365438","144805","248332497","9690024","95.882","3.741","32.700708","-97.124691"
 3 | "CA","0603526","02409774","Bakersfield","25","A","347483","120725","368204317","3741691","142.164","1.445","35.321213","-119.018291"
 4 | "LA","2255000","00545142","New Orleans","25","A","343829","189896","438803381","468240430","169.423","180.789","30.068636","-89.939007"
 5 | "HI","1571550","02630783","Urban Honolulu CDP","57","S","337256","143173","156748036","20484151","60.521","7.909","21.325852","-157.845315"
 6 | "CA","0602000","02409704","Anaheim","25","A","336265","104237","129073275","2526668","49.835","0.976","33.855497","-117.760071"
 7 | "FL","1271000","02405568","Tampa","25","A","335709","157130","293727878","160127838","113.409","61.826","27.970086","-82.479673"
 8 | "CO","0804000","02409757","Aurora","25","A","325078","131040","400759192","1806832","154.734","0.698","39.688002","-104.689740"
 9 | "CA","0669000","02411814","Santa Ana","25","A","324528","76896","70627761","643479","27.270","0.248","33.736478","-117.882593"
10 | "MO","2965000","00767557","St. Louis","25","A","319294","176002","160343174","10683076","61.909","4.125","38.635699","-90.244582"
11 | "PA","4261000","01214818","Pittsburgh","25","A","305704","156165","143399923","7693613","55.367","2.971","40.439753","-79.976592"
12 | "TX","4817000","02410234","Corpus Christi","25","A","305215","125469","415982136","852055055","160.612","328.980","27.754252","-97.173385"
13 | "CA","0662000","02410965","Riverside","25","A","303871","98444","210152356","788400","81.140","0.304","33.938143","-117.393168"
14 | "OH","3915000","01086201","Cincinnati","25","A","296943","161095","201869928","4155439","77.942","1.604","39.139902","-84.506446"
15 | "KY","2146027","02405089","Lexington-Fayette urban county","UC","A","295803","135160","734648526","4922803","283.649","1.901","38.040157","-84.458443"
16 | "AK","0203000","02419025","Anchorage municipality","37","A","291826","113032","4415108963","663860984","1704.683","256.318","61.177549","-149.274354"
17 | "CA","0675000","02411987","Stockton","25","A","291707","99637","159723404","7984682","61.670","3.083","37.976342","-121.313304"
18 | "OH","3977000","01086537","Toledo","25","A","287208","138039","208991246","8889079","80.692","3.432","41.664071","-83.581861"
19 | "MN","2758000","02396511","St. Paul","25","A","285068","120795","134623737","10875208","51.979","4.199","44.948869","-93.103855"
20 | "NJ","3451000","00885317","Newark","25","A","277140","109520","62643850","4972876","24.187","1.920","40.724220","-74.172574"
21 | "NC","3728000","02403745","Greensboro","25","A","269666","124074","327673360","13690607","126.515","5.286","36.096483","-79.827108"
22 | "NY","3611000","00978764","Buffalo","25","A","261310","133444","104594197","31364094","40.384","12.110","42.892492","-78.859686"
23 | "TX","4858016","02411437","Plano","25","A","259841","103672","185394655","937663","71.581","0.362","33.050769","-96.747944"
24 | "NE","3128000","02395713","Lincoln","25","A","258379","110546","230804010","3229386","89.114","1.247","40.808957","-96.680354"
25 | "NV","3231900","02410741","Henderson","25","A","257729","113586","279023542","0","107.732","0.000","36.012233","-115.037462"
26 | "IN","1825000","02394798","Fort Wayne","25","A","253691","113541","286500436","553423","110.618","0.214","41.088173","-85.143880"
27 | "NJ","3436000","00885264","Jersey","25","A","247597","108720","38315542","16280557","14.794","6.286","40.711417","-74.064760"
28 | "FL","1263000","02405401","St. Petersburg","25","A","244769","129401","159909751","196473878","61.742","75.859","27.761976","-82.644055"
29 | "CA","0613392","02409461","Chula Vista","25","A","243916","79416","128544675","6380068","49.631","2.463","32.627670","-117.015170"
30 | "VA","5157000","01498557","Norfolk","25","A","242803","95018","140171293","109376999","54.120","42.231","36.923015","-76.244641"
31 | "FL","1253000","02404443","Orlando","25","A","238300","121254","265203107","21469603","102.395","8.289","28.415886","-81.298750"
32 | "AZ","0412000","02409433","Chandler","25","A","236123","94404","166828220","289715","64.413","0.112","33.282874","-111.854943"
33 | "TX","4841464","02411626","Laredo","25","A","236091","68610","230271380","3754983","88.908","1.450","27.547681","-99.486931"
34 | "WI","5548000","01583625","Madison","25","A","233209","108843","198882058","44658619","76.789","17.243","43.087806","-89.430121"
35 | "NC","3775000","02405771","Winston-Salem","25","A","229617","103974","343041264","3228612","132.449","1.247","36.103262","-80.260578"
36 | "TX","4845000","02410892","Lubbock","25","A","229573","95926","317041399","2962034","122.410","1.144","33.566479","-101.886677"
37 | "LA","2205000","02403821","Baton Rouge","25","B","229493","100801","199291656","5588234","76.947","2.158","30.448454","-91.125899"
38 | "NC","3719000","02403521","Durham","25","A","228330","103221","278087581","2357401","107.370","0.910","35.980964","-78.905647"
39 | "TX","4829000","02410572","Garland","25","A","226876","80834","147848881","340126","57.085","0.131","32.909826","-96.630357"
40 | "AZ","0427820","02410596","Glendale","25","A","226721","90505","155337275","401624","59.976","0.155","33.533111","-112.189901"
41 | "NV","3260600","02410923","Reno","25","A","225221","102582","266792840","7423507","103.009","2.866","39.474487","-119.776538"
42 | "FL","1230000","02404689","Hialeah","25","A","224669","74067","55554697","3599730","21.450","1.390","25.869941","-80.302865"
43 | "NV","3254600","02409023","Paradise CDP","57","S","223167","114296","120996826","0","46.717","0.000","36.080689","-115.136839"
44 | "VA","5116000","01498558","Chesapeake","25","A","222209","83196","882669156","26052854","340.800","10.059","36.679376","-76.301788"
45 | "AZ","0465000","02411845","Scottsdale","25","A","217385","124001","476350341","1231086","183.920","0.475","33.668727","-111.823682"
46 | "NV","3251800","02411273","North Las Vegas","25","A","216961","76073","262483131","112001","101.345","0.043","36.282974","-115.089262"
47 | "TX","4837000","02410117","Irving","25","A","216290","91128","173573892","2594600","67.017","1.002","32.857748","-96.970022"
48 | "CA","0626000","02410545","Fremont","25","A","214089","73989","200617968","26291598","77.459","10.151","37.494373","-121.941117"
49 | "CA","0636770","02410116","Irvine","25","A","212375","83899","171214072","900908","66.106","0.348","33.678399","-117.771254"
50 | "AL","0107000","02403868","Birmingham","25","A","212237","108981","378310927","6590665","146.067","2.545","33.527444","-86.799047"
51 | "NY","3663000","00979426","Rochester","25","A","210565","97158","92671789","3558427","35.781","1.374","43.169927","-77.616891"
52 | 


--------------------------------------------------------------------------------
/spec/fixtures/cities_with_yml/data.yml:
--------------------------------------------------------------------------------
 1 | # cities100.txt
 2 | # National Places Gazetteer Files, from US Census 2010
 3 | # https://www.census.gov/geo/maps-data/data/gazetteer2010.html
 4 | # (head -n 1 source.txt && tail -n +2 source.txt | LC_ALL=C sort -k7rn,7 -t$'\t' source.txt) > result.txt
 5 | # head -n 101 results.txt > cities100.txt
 6 | # then convertes to csv and removed " city" from after each city name
 7 | version: fixture-import-all
 8 | index: city-data
 9 | api: cities
10 | global_mapping:
11 |   USPS: state
12 |   NAME: name
13 |   POP10: population
14 |   INTPTLAT: latitude
15 |   INTPTLONG: longitude
16 | 
17 | files:
18 |   - name: cities50.csv
19 |     add:
20 |       category: 'top50'
21 |   - name: cities51-100.csv
22 | 


--------------------------------------------------------------------------------
/spec/fixtures/cities_with_yml/more.csv:
--------------------------------------------------------------------------------
1 | state,city,lat,lon
2 | CA,Secret City,37.727239,-123.032229
3 | 


--------------------------------------------------------------------------------
/spec/fixtures/cities_without_yml/cities51-100.csv:
--------------------------------------------------------------------------------
 1 | "USPS","GEOID","ANSICODE","NAME","LSAD","FUNCSTAT","POP10","HU10","ALAND","AWATER","ALAND_SQMI","AWATER_SQMI","INTPTLAT","INTPTLONG"
 2 | "TX","4804000","02409731","Arlington","25","A","365438","144805","248332497","9690024","95.882","3.741","32.700708","-97.124691"
 3 | "CA","0603526","02409774","Bakersfield","25","A","347483","120725","368204317","3741691","142.164","1.445","35.321213","-119.018291"
 4 | "LA","2255000","00545142","New Orleans","25","A","343829","189896","438803381","468240430","169.423","180.789","30.068636","-89.939007"
 5 | "HI","1571550","02630783","Urban Honolulu CDP","57","S","337256","143173","156748036","20484151","60.521","7.909","21.325852","-157.845315"
 6 | "CA","0602000","02409704","Anaheim","25","A","336265","104237","129073275","2526668","49.835","0.976","33.855497","-117.760071"
 7 | "FL","1271000","02405568","Tampa","25","A","335709","157130","293727878","160127838","113.409","61.826","27.970086","-82.479673"
 8 | "CO","0804000","02409757","Aurora","25","A","325078","131040","400759192","1806832","154.734","0.698","39.688002","-104.689740"
 9 | "CA","0669000","02411814","Santa Ana","25","A","324528","76896","70627761","643479","27.270","0.248","33.736478","-117.882593"
10 | "MO","2965000","00767557","St. Louis","25","A","319294","176002","160343174","10683076","61.909","4.125","38.635699","-90.244582"
11 | "PA","4261000","01214818","Pittsburgh","25","A","305704","156165","143399923","7693613","55.367","2.971","40.439753","-79.976592"
12 | "TX","4817000","02410234","Corpus Christi","25","A","305215","125469","415982136","852055055","160.612","328.980","27.754252","-97.173385"
13 | "CA","0662000","02410965","Riverside","25","A","303871","98444","210152356","788400","81.140","0.304","33.938143","-117.393168"
14 | "OH","3915000","01086201","Cincinnati","25","A","296943","161095","201869928","4155439","77.942","1.604","39.139902","-84.506446"
15 | "KY","2146027","02405089","Lexington-Fayette urban county","UC","A","295803","135160","734648526","4922803","283.649","1.901","38.040157","-84.458443"
16 | "AK","0203000","02419025","Anchorage municipality","37","A","291826","113032","4415108963","663860984","1704.683","256.318","61.177549","-149.274354"
17 | "CA","0675000","02411987","Stockton","25","A","291707","99637","159723404","7984682","61.670","3.083","37.976342","-121.313304"
18 | "OH","3977000","01086537","Toledo","25","A","287208","138039","208991246","8889079","80.692","3.432","41.664071","-83.581861"
19 | "MN","2758000","02396511","St. Paul","25","A","285068","120795","134623737","10875208","51.979","4.199","44.948869","-93.103855"
20 | "NJ","3451000","00885317","Newark","25","A","277140","109520","62643850","4972876","24.187","1.920","40.724220","-74.172574"
21 | "NC","3728000","02403745","Greensboro","25","A","269666","124074","327673360","13690607","126.515","5.286","36.096483","-79.827108"
22 | "NY","3611000","00978764","Buffalo","25","A","261310","133444","104594197","31364094","40.384","12.110","42.892492","-78.859686"
23 | "TX","4858016","02411437","Plano","25","A","259841","103672","185394655","937663","71.581","0.362","33.050769","-96.747944"
24 | "NE","3128000","02395713","Lincoln","25","A","258379","110546","230804010","3229386","89.114","1.247","40.808957","-96.680354"
25 | "NV","3231900","02410741","Henderson","25","A","257729","113586","279023542","0","107.732","0.000","36.012233","-115.037462"
26 | "IN","1825000","02394798","Fort Wayne","25","A","253691","113541","286500436","553423","110.618","0.214","41.088173","-85.143880"
27 | "NJ","3436000","00885264","Jersey","25","A","247597","108720","38315542","16280557","14.794","6.286","40.711417","-74.064760"
28 | "FL","1263000","02405401","St. Petersburg","25","A","244769","129401","159909751","196473878","61.742","75.859","27.761976","-82.644055"
29 | "CA","0613392","02409461","Chula Vista","25","A","243916","79416","128544675","6380068","49.631","2.463","32.627670","-117.015170"
30 | "VA","5157000","01498557","Norfolk","25","A","242803","95018","140171293","109376999","54.120","42.231","36.923015","-76.244641"
31 | "FL","1253000","02404443","Orlando","25","A","238300","121254","265203107","21469603","102.395","8.289","28.415886","-81.298750"
32 | "AZ","0412000","02409433","Chandler","25","A","236123","94404","166828220","289715","64.413","0.112","33.282874","-111.854943"
33 | "TX","4841464","02411626","Laredo","25","A","236091","68610","230271380","3754983","88.908","1.450","27.547681","-99.486931"
34 | "WI","5548000","01583625","Madison","25","A","233209","108843","198882058","44658619","76.789","17.243","43.087806","-89.430121"
35 | "NC","3775000","02405771","Winston-Salem","25","A","229617","103974","343041264","3228612","132.449","1.247","36.103262","-80.260578"
36 | "TX","4845000","02410892","Lubbock","25","A","229573","95926","317041399","2962034","122.410","1.144","33.566479","-101.886677"
37 | "LA","2205000","02403821","Baton Rouge","25","B","229493","100801","199291656","5588234","76.947","2.158","30.448454","-91.125899"
38 | "NC","3719000","02403521","Durham","25","A","228330","103221","278087581","2357401","107.370","0.910","35.980964","-78.905647"
39 | "TX","4829000","02410572","Garland","25","A","226876","80834","147848881","340126","57.085","0.131","32.909826","-96.630357"
40 | "AZ","0427820","02410596","Glendale","25","A","226721","90505","155337275","401624","59.976","0.155","33.533111","-112.189901"
41 | "NV","3260600","02410923","Reno","25","A","225221","102582","266792840","7423507","103.009","2.866","39.474487","-119.776538"
42 | "FL","1230000","02404689","Hialeah","25","A","224669","74067","55554697","3599730","21.450","1.390","25.869941","-80.302865"
43 | "NV","3254600","02409023","Paradise CDP","57","S","223167","114296","120996826","0","46.717","0.000","36.080689","-115.136839"
44 | "VA","5116000","01498558","Chesapeake","25","A","222209","83196","882669156","26052854","340.800","10.059","36.679376","-76.301788"
45 | "AZ","0465000","02411845","Scottsdale","25","A","217385","124001","476350341","1231086","183.920","0.475","33.668727","-111.823682"
46 | "NV","3251800","02411273","North Las Vegas","25","A","216961","76073","262483131","112001","101.345","0.043","36.282974","-115.089262"
47 | "TX","4837000","02410117","Irving","25","A","216290","91128","173573892","2594600","67.017","1.002","32.857748","-96.970022"
48 | "CA","0626000","02410545","Fremont","25","A","214089","73989","200617968","26291598","77.459","10.151","37.494373","-121.941117"
49 | "CA","0636770","02410116","Irvine","25","A","212375","83899","171214072","900908","66.106","0.348","33.678399","-117.771254"
50 | "AL","0107000","02403868","Birmingham","25","A","212237","108981","378310927","6590665","146.067","2.545","33.527444","-86.799047"
51 | "NY","3663000","00979426","Rochester","25","A","210565","97158","92671789","3558427","35.781","1.374","43.169927","-77.616891"
52 | 


--------------------------------------------------------------------------------
/spec/fixtures/cities_without_yml/more.csv:
--------------------------------------------------------------------------------
1 | state,city,lat,lon
2 | CA,Secret City,37.727239,-123.032229
3 | 


--------------------------------------------------------------------------------
/spec/fixtures/data.rb:
--------------------------------------------------------------------------------
 1 | # Ages adjusted for Springfield residents to average to 42
 2 | # Heights randomly set to generate a max of 142
 3 | def address_data
 4 |   @address_data ||= StringIO.new <<-eos
 5 | name,address,city,age,height
 6 | Paul,15 Penny Lane,Liverpool,10,142
 7 | Michelle,600 Pennsylvania Avenue,Washington,12,1
 8 | Marilyn,1313 Mockingbird Lane,Springfield,14,2
 9 | Sherlock,221B Baker Street,London,16,123
10 | Clark,66 Lois Lane,Smallville,18,141
11 | Bart,742 Evergreen Terrace,Springfield,70,142
12 | Paul,19 N Square,Boston,70,55.2
13 | Peter,66 Parker Lane,New York,74,11.5123
14 | eos
15 |   @address_data.rewind
16 |   @address_data
17 | end
18 | 
19 | def geo_data
20 |   @geo_data ||= StringIO.new <<-eos
21 | state,city,lat,lon
22 | CA,San Francisco,37.727239,-123.032229
23 | NY,"New York",40.664274,-73.938500
24 | CA,"Los Angeles",34.019394,-118.410825
25 | IL,Chicago,41.837551,-87.681844
26 | TX,Houston,29.780472,-95.386342
27 | PA,Philadelphia,40.009376,-75.133346
28 | CA,"San Jose",37.296867,-121.819306
29 | MA,Boston,42.331960,-71.020173
30 | WA,Seattle,47.620499,-122.350876
31 | eos
32 |   @geo_data.rewind
33 |   @geo_data
34 | end
35 | 


--------------------------------------------------------------------------------
/spec/fixtures/geo/data.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # cities100.txt
 4 | # National Places Gazetteer Files, from US Census 2010
 5 | # https://www.census.gov/geo/maps-data/data/gazetteer2010.html
 6 | # (head -n 1 source.txt && tail -n +2 source.txt | LC_ALL=C sort -k7rn,7 -t$'\t' source.txt) > result.txt
 7 | # head -n 101 results.txt > cities100.txt
 8 | # then convertes to csv and removed " city" from after each city name
 9 | dictionary:
10 |   city: city
11 |   location.lat: lat
12 |   location.lon: lon
13 | 
14 | index: place-data
15 | api: places
16 | files:
17 |   - name: places.csv
18 | 


--------------------------------------------------------------------------------
/spec/fixtures/geo/places.csv:
--------------------------------------------------------------------------------
 1 | state,city,lat,lon
 2 | CA,"San Francisco",37.727239,-123.032229
 3 | NY,"New York",40.664274,-73.938500
 4 | CA,"Los Angeles",34.019394,-118.410825
 5 | IL,Chicago,41.837551,-87.681844
 6 | TX,Houston,29.780472,-95.386342
 7 | PA,Philadelphia,40.009376,-75.133346
 8 | CA,"San Jose",37.296867,-121.819306
 9 | MA,Boston,42.331960,-71.020173
10 | WA,Seattle,47.620499,-122.350876
11 | 


--------------------------------------------------------------------------------
/spec/fixtures/geo_no_files/data.yaml:
--------------------------------------------------------------------------------
1 | # data.yaml for geo tests
2 | index: place-data
3 | 


--------------------------------------------------------------------------------
/spec/fixtures/import_with_dictionary/data.yaml:
--------------------------------------------------------------------------------
 1 | # cities100.txt
 2 | # National Places Gazetteer Files, from US Census 2010
 3 | # https://www.census.gov/geo/maps-data/data/gazetteer2010.html
 4 | # (head -n 1 source.txt && tail -n +2 source.txt | LC_ALL=C sort -k7rn,7 -t$'\t' source.txt) > result.txt
 5 | # head -n 101 results.txt > cities100.txt
 6 | # then convertes to csv and removed " city" from after each city name
 7 | version: fixture-import-all
 8 | index: city-data
 9 | api: cities
10 | dictionary:
11 |   state: USPS
12 |   name: NAME
13 |   population: POP10
14 |   latitude: INTPTLAT
15 |   longitude: INTPTLONG
16 | 
17 | files:
18 |   - name: cities50.csv
19 |     add:
20 |       category: 'top50'
21 |   - name: cities51-100.csv
22 | 


--------------------------------------------------------------------------------
/spec/fixtures/import_with_dictionary/more.csv:
--------------------------------------------------------------------------------
1 | state,city,lat,lon
2 | CA,Secret City,37.727239,-123.032229
3 | 


--------------------------------------------------------------------------------
/spec/fixtures/import_with_errors/cities4.csv:
--------------------------------------------------------------------------------
1 | USPS,GEOID,ANSICODE,NAME,POP10
2 | NY,3651000,2395220,New York,8175133
3 | CA,644000,2410877,Los Angeles,3792621
4 | IL,1714000,428803,Chicago,2695598
5 | TX,4835000,2410796,Houston,2099451


--------------------------------------------------------------------------------
/spec/fixtures/import_with_errors/data.yaml:
--------------------------------------------------------------------------------
 1 | version: fixture-type-error
 2 | index: expect-errors
 3 | api: nothing
 4 | 
 5 | dictionary:
 6 |   state: USPS
 7 |   name: NAME
 8 |   population:
 9 |     source: POP10
10 |     type: broken
11 | 
12 | files:
13 |   - name: cities4.csv
14 |     add:
15 |       year: 2010
16 | 


--------------------------------------------------------------------------------
/spec/fixtures/import_with_null_value/data.yaml:
--------------------------------------------------------------------------------
 1 | index: city-data
 2 | api: cities
 3 | unique: ['name']
 4 | null_value: 'abc123'
 5 | options:
 6 |   columns: all
 7 | 
 8 | dictionary:
 9 |   state: USPS
10 |   population: POP10
11 |   name: NAME
12 | 
13 | files:
14 |   - name: null_values.csv
15 | 


--------------------------------------------------------------------------------
/spec/fixtures/import_with_null_value/null_values.csv:
--------------------------------------------------------------------------------
1 | USPS,GEOID,ANSICODE,NAME,POP10
2 | NY,abc123,2395220,New York,8175133
3 | CA,644000,2410877,Los Angeles,3792621
4 | IL,1714000,428803,Chicago,2695598
5 | TX,4835000,2410796,Houston,2099451
6 | 


--------------------------------------------------------------------------------
/spec/fixtures/import_with_options/cities4.csv:
--------------------------------------------------------------------------------
1 | USPS,GEOID,ANSICODE,NAME,POP10
2 | NY,3651000,2395220,New York,8175133
3 | CA,644000,2410877,Los Angeles,3792621
4 | IL,1714000,428803,Chicago,2695598
5 | TX,4835000,2410796,Houston,2099451


--------------------------------------------------------------------------------
/spec/fixtures/import_with_options/data.yaml:
--------------------------------------------------------------------------------
 1 | version: fixture-import-options
 2 | index: city-data
 3 | api: cities
 4 | options:
 5 |   columns: all
 6 |   limit_files: 1
 7 |   limit_rows: 3
 8 | 
 9 | dictionary:
10 |   state: USPS
11 |   name: NAME
12 |   population: POP10
13 | 
14 | files:
15 |   - name: cities4.csv
16 |     add:
17 |       year: 2010
18 |   - name: more_cities.csv # this shouldn't get imported
19 |     add:
20 |       year: 1000
21 | 


--------------------------------------------------------------------------------
/spec/fixtures/import_with_options/more_cities.csv:
--------------------------------------------------------------------------------
1 | USPS,GEOID,ANSICODE,NAME,POP10
2 | XX,0,0,YY,0
3 | 


--------------------------------------------------------------------------------
/spec/fixtures/invalid_utf8.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/spec/fixtures/invalid_utf8.csv


--------------------------------------------------------------------------------
/spec/fixtures/minimal/data.yaml:
--------------------------------------------------------------------------------
1 | # smallest possible data.yaml
2 | index: my-index
3 | 


--------------------------------------------------------------------------------
/spec/fixtures/nested2/data.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | version: Aug6-2015-08-10-23:48-0600
 3 | api: fakeschool
 4 | index: fakeschool-data
 5 | unique:
 6 | - id
 7 | options:
 8 | #  columns: all
 9 |   limit_files: 1
10 |   limit_rows: 100
11 |   search: dictionary_only
12 | 
13 | dictionary:
14 |   id:
15 |     source: UNITID
16 |     type: integer
17 |     description: Unit ID for institution
18 |   ope8_id:
19 |     source: OPEID
20 |     type: integer
21 |     description: 8-digit OPE ID for institution
22 |   ope6_id:
23 |     source: opeid6
24 |     type: integer
25 |     description: 6-digit OPE ID for institution
26 |   school.name:
27 |     source: INSTNM
28 |     type: literal
29 |     description: Institution name
30 |   school.city:
31 |     source: CITY_MAIN
32 |     description: City
33 |   school.state:
34 |     source: STABBR_MAIN
35 |     description: State postcode
36 |   school.zip:
37 |     source: ZIP_MAIN
38 |     type: integer
39 |     description: ZIP code
40 | 
41 | files:
42 |   - name: school2013.csv
43 | 


--------------------------------------------------------------------------------
/spec/fixtures/nested2/school2013.csv:
--------------------------------------------------------------------------------
 1 | UNITID,CITY_MAIN,STABBR_MAIN,ST_FIPS_MAIN,ZIP_MAIN,REGION_MAIN,LATITUDE_MAIN,LONGITUDE_MAIN,INSTNM,SAT_AVG,earn_2002_p10,gt_25k_2006_p6
 2 | 1,Normal,AL,1,35762,5,34.7834,-86.5685,Reichert University,1195,26318,0.53
 3 | 2,Montgomery,AL,1,36109-3378,5,32.3842,-86.2164,Montgomery School,770,6785,0.61
 4 | 3,Montevallo,AL,1,35115-6000,5,33.1063,-86.8651,Indigo Card Community College,526,16767,0.50
 5 | 4,Montgomery,AL,1,36104-0271,5,32.3643,-86.2957,Warm Meadow School of Fine Art,457,1836,0.09
 6 | 5,Alexander City,AL,1,35010,5,32.9244,-85.9465,Kovacek Institute of Technology,1511,19372,0.82
 7 | 6,Athens,AL,1,35611,5,34.8056,-86.9651,Athens Institute,1057,49203,0.06
 8 | 7,Auburn University,AL,1,36849,5,32.6002,-85.4924,Alabama Beauty College of Auburn University,486,44097,0.50
 9 | 8,Birmingham,AL,1,35254,5,33.5155,-86.8536,Condemned Balloon Institute,616,59759,0.59
10 | 9,Tanner,AL,1,35671,5,34.6543,-86.9491,Inquisitive Farm College,971,34183,0.19
11 | 10,Enterprise,AL,1,36330-1300,5,31.2975,-85.837,Enterprise University,920,42629,0.59
12 | 


--------------------------------------------------------------------------------
/spec/fixtures/nested_data_type/data.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | version: 1.5.5
  3 | api: schools
  4 | index: school-data
  5 | unique:
  6 | - id
  7 | options:
  8 |   limit_rows: 10
  9 | 
 10 | dictionary:
 11 |   id:
 12 |     source: UNITID
 13 |     type: integer
 14 |     description: Unit ID for institution
 15 |   school.name:
 16 |     source: INSTNM
 17 |     type: autocomplete
 18 |     description: Institution name
 19 |   school.city:
 20 |     source: CITY
 21 |     type: autocomplete
 22 |     description: City
 23 |   school.degrees_awarded.predominant:
 24 |     source: PREDDEG
 25 |     type: integer
 26 |     description: |-
 27 |       Predominant undergraduate degree awarded
 28 |         0 Not classified
 29 |         1 Predominantly certificate-degree granting
 30 |         2 Predominantly associate's-degree granting
 31 |         3 Predominantly bachelor's-degree granting
 32 |         4 Entirely graduate-degree granting
 33 |   programs.cip_4_digit.code:
 34 |     source: CIPCODE
 35 |     type: integer
 36 |     map: program
 37 |     description: Classification of Instructional Programs (CIP) code for the field of study
 38 |   programs.cip_4_digit.credential.level:
 39 |     source: CREDLEV
 40 |     type: integer
 41 |     map: program
 42 |     descripton: "Level of credential
 43 |       Credentials are categorized into the following levels:
 44 |       1: Undergraduate Certificate or Diploma
 45 |       2: Associate's Degree
 46 |       3: Bachelor's Degree
 47 |       4: Post-baccalaureate Certificate
 48 |       5: Master's Degree
 49 |       6: Doctoral Degree
 50 |       7: First Professional Degree
 51 |       8: Graduate/Professional Certificate"
 52 |   programs.cip_4_digit.earnings.median_earnings:
 53 |     source: P_MD_EARN_WNE
 54 |     map: program
 55 |     type: integer
 56 |     description: Median earnings of federally-aided completers in the earnings cohort
 57 | 
 58 | shared_config: &shared_config
 59 |   contents:
 60 |   - earnings
 61 |   - completion
 62 |   - aid
 63 |   - repayment
 64 |   - student
 65 |   - cost
 66 |   - academics
 67 |   - admissions
 68 | 
 69 | data_types:
 70 |   nested:
 71 |     - 2016.programs.cip_4_digit
 72 | 
 73 | partial_map:
 74 |   program_debt:
 75 |     root: id
 76 |     path: programs.cip_4_digit
 77 |     link: programs.cip_4_digit.ope6_id
 78 | 
 79 | files:
 80 |   - name: Most-Recent-Cohorts-All-Data-Elements.csv
 81 |     root: true
 82 |     only:
 83 |     - school
 84 |     nest:
 85 |       key: latest
 86 |       <<: *shared_config
 87 |     delta_only:
 88 |     - ope8_id
 89 |     - ope6_id
 90 |     - location
 91 |     - school
 92 |   - name: FieldOfStudyData1516_1617_PP.csv
 93 |     map: program_data
 94 |     nest:
 95 |       parent_missing: skip
 96 |       key: latest
 97 |       contents:
 98 |       - programs
 99 |   - name: MERGED2016_17_PP.csv
100 |     nest:
101 |       parent_missing: skip
102 |       key: 2016
103 |       <<: *shared_config


--------------------------------------------------------------------------------
/spec/fixtures/nested_delta_files/data.yaml:
--------------------------------------------------------------------------------
 1 | version: 1.5.5
 2 | api: school
 3 | index: fake-nested-delta
 4 | unique: [id]
 5 | 
 6 | dictionary:
 7 |   id: UNITID
 8 |   name:
 9 |     source: INSTNM
10 |     type: literal
11 |   city: CITY_MAIN
12 |   state: STABBR_MAIN
13 |   zipcode: ZIP_MAIN
14 |   location.lat: LATITUDE_MAIN
15 |   location.lon: LONGITUDE_MAIN
16 |   under_investigation:
17 |     source: HCM2
18 |     type: integer
19 |     description: under ED monitoring flag
20 | 
21 |   sat_average: SAT_AVG
22 |   earnings.6_yrs_after_entry.median:
23 |     source: earn_2002_p10
24 |     description: Median earnings of students
25 |     type: integer
26 | 
27 |   earnings.6_yrs_after_entry.percent_gt_25k:
28 |     source: gt_25k_2006_p6
29 |     description: Share of students earning over $25,000/year
30 |     type: float
31 | 
32 | files:
33 |   - name: latest-school-data.csv
34 |     only: [id, name, city, state, under_investigation]
35 |   - name: latest-school-data.csv
36 |     nest:
37 |       key: latest
38 |       contents: [earnings, sat_average]
39 |     delta_only: [city, state, under_investigation]
40 |   - name: school2013.csv
41 |     nest:
42 |       key: 2013
43 |       contents: [earnings, sat_average]
44 |   - name: school2012.csv
45 |     nest:
46 |       key: 2012
47 |       contents: [earnings, sat_average]
48 | 


--------------------------------------------------------------------------------
/spec/fixtures/nested_delta_files/delta/latest-school-data_update1.csv:
--------------------------------------------------------------------------------
 1 | UNITID,CITY_MAIN,STABBR_MAIN,ST_FIPS_MAIN,ZIP_MAIN,REGION_MAIN,LATITUDE_MAIN,LONGITUDE_MAIN,INSTNM,SAT_AVG,earn_2002_p10,gt_25k_2006_p6,HCM2
 2 | 1,Normal,AL,1,35762,5,34.7834,-86.5685,Alabama A & M University,1195,30000,0.53,1
 3 | 2,Montgomery,AL,1,36109-3378,5,32.3842,-86.2164,Montgomery School,770,6785,0.61.0
 4 | 3,Montevallo,AL,1,35115-6000,5,33.1063,-86.8651,Indigo Card Community College,526,16767,0.50,1
 5 | 4,Montgomery,AL,1,36104-0271,5,32.3643,-86.2957,Warm Meadow School of Fine Art,457,1900,0.1,0
 6 | 5,Alexander City,AL,1,35010,5,32.9244,-85.9465,Kovacek Institute of Technology,1511,19372,0.82,1
 7 | 6,Athens,AL,1,35611,5,34.8056,-86.9651,Athens Institute,1057,49203,0.06,0
 8 | 7,Auburn University,AL,1,36849,5,32.6002,-85.4924,Alabama Beauty College of Auburn University,486,44097,0.50,1
 9 | 8,Birmingham,AL,1,35254,5,33.5155,-86.8536,Condemned Balloon Institute,616,59759,0.59,0
10 | 9,Tanner,AL,1,35671,5,34.6543,-86.9491,Inquisitive Farm College,971,34183,0.19,1
11 | 10,Enterprise,AL,1,36330-1300,5,31.2975,-85.837,Enterprise University,920,42629,0.59,0
12 | 11,Montgomery,NULL,1,36117,5,32.3643,-86.2957,Auburn University at Montgomery,940,49879,0.64,1
13 | 


--------------------------------------------------------------------------------
/spec/fixtures/nested_delta_files/latest-school-data.csv:
--------------------------------------------------------------------------------
 1 | UNITID,CITY_MAIN,STABBR_MAIN,ST_FIPS_MAIN,ZIP_MAIN,REGION_MAIN,LATITUDE_MAIN,LONGITUDE_MAIN,INSTNM,SAT_AVG,earn_2002_p10,gt_25k_2006_p6,HCM2
 2 | 1,Normal,AL,1,35762,5,34.7834,-86.5685,Reichert University,1195,26318,0.53,0
 3 | 2,Montgomery,AL,1,36109-3378,5,32.3842,-86.2164,Montgomery School,770,6785,0.61.0
 4 | 3,Montevallo,AL,1,35115-6000,5,33.1063,-86.8651,Indigo Card Community College,526,16767,0.50,0
 5 | 4,Montgomery,AL,1,36104-0271,5,32.3643,-86.2957,Warm Meadow School of Fine Art,457,1836,0.09,0
 6 | 5,Alexander City,AL,1,35010,5,32.9244,-85.9465,Kovacek Institute of Technology,1511,19372,0.82,0
 7 | 6,Athens,AL,1,35611,5,34.8056,-86.9651,Athens Institute,1057,49203,0.06,0
 8 | 7,Auburn University,AL,1,36849,5,32.6002,-85.4924,Alabama Beauty College of Auburn University,486,44097,0.50,0
 9 | 8,Birmingham,AL,1,35254,5,33.5155,-86.8536,Condemned Balloon Institute,616,59759,0.59,0
10 | 9,Tanner,AL,1,35671,5,34.6543,-86.9491,Inquisitive Farm College,971,34183,0.19,0
11 | 10,Enterprise,AL,1,36330-1300,5,31.2975,-85.837,Enterprise University,920,42629,0.59,0
12 | 11,Montgomery,NULL,1,36117,5,32.3643,-86.2957,Auburn University at Montgomery,940,49879,0.64,0
13 | 


--------------------------------------------------------------------------------
/spec/fixtures/nested_delta_files/school2012.csv:
--------------------------------------------------------------------------------
 1 | UNITID,CITY_MAIN,STABBR_MAIN,ST_FIPS_MAIN,ZIP_MAIN,REGION_MAIN,LATITUDE_MAIN,LONGITUDE_MAIN,INSTNM,SAT_AVG,earn_2002_p10,gt_25k_2006_p6,HCM2
 2 | 1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,461,35231,0.01,0
 3 | 2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,986,34095,0.71,0
 4 | 3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1094,42579,0.39,0
 5 | 4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,854,37589,0.15,0
 6 | 5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,650,13611,0.04,0
 7 | 6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,797,36924,0.64,0
 8 | 7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,994,31799,0.60,0
 9 | 8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1420,30063,0.97,0
10 | 9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1292,42150,0.83,0
11 | 10,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,605,2608,0.92,0
12 | 11,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,2608,0.92,0
13 | 


--------------------------------------------------------------------------------
/spec/fixtures/nested_delta_files/school2013.csv:
--------------------------------------------------------------------------------
 1 | UNITID,CITY_MAIN,STABBR_MAIN,ST_FIPS_MAIN,ZIP_MAIN,REGION_MAIN,LATITUDE_MAIN,LONGITUDE_MAIN,INSTNM,SAT_AVG,earn_2002_p10,gt_25k_2006_p6,HCM2
 2 | 1,Normal,AL,1,35762,5,34.7834,-86.5685,Reichert University,1195,26318,0.53,0
 3 | 2,Montgomery,AL,1,36109-3378,5,32.3842,-86.2164,Montgomery School,770,6785,0.61,0
 4 | 3,Montevallo,AL,1,35115-6000,5,33.1063,-86.8651,Indigo Card Community College,526,16767,0.50,0
 5 | 4,Montgomery,AL,1,36104-0271,5,32.3643,-86.2957,Warm Meadow School of Fine Art,457,1836,0.09,0
 6 | 5,Alexander City,AL,1,35010,5,32.9244,-85.9465,Kovacek Institute of Technology,1511,19372,0.82,0
 7 | 6,Athens,AL,1,35611,5,34.8056,-86.9651,Athens Institute,1057,49203,0.06,0
 8 | 7,Auburn University,AL,1,36849,5,32.6002,-85.4924,Alabama Beauty College of Auburn University,486,44097,0.50,0
 9 | 8,Birmingham,AL,1,35254,5,33.5155,-86.8536,Condemned Balloon Institute,616,59759,0.59,0
10 | 9,Tanner,AL,1,35671,5,34.6543,-86.9491,Inquisitive Farm College,971,34183,0.19,0
11 | 10,Enterprise,AL,1,36330-1300,5,31.2975,-85.837,Enterprise University,920,42629,0.59,0
12 | 


--------------------------------------------------------------------------------
/spec/fixtures/nested_files/data.yaml:
--------------------------------------------------------------------------------
 1 | version: 0.2
 2 | api: school
 3 | index: fake-nested
 4 | unique: [id]
 5 | 
 6 | dictionary:
 7 |   id: UNITID
 8 |   name:
 9 |     source: INSTNM
10 |     type: literal
11 |   city: CITY_MAIN
12 |   state: STABBR_MAIN
13 |   zipcode: ZIP_MAIN
14 |   sat_average: SAT_AVG
15 |   location.lat: LATITUDE_MAIN
16 |   location.lon: LONGITUDE_MAIN
17 | 
18 |   earnings.6_yrs_after_entry.median:
19 |     source: earn_2002_p10
20 |     description: Median earnings of students
21 |     type: integer
22 | 
23 |   earnings.6_yrs_after_entry.percent_gt_25k:
24 |     source: gt_25k_2006_p6
25 |     description: Share of students earning over $25,000/year
26 |     type: float
27 | 
28 | files:
29 |   - name: school-data.csv
30 |     only: [id, name, city, state]
31 |   - name: school2013.csv
32 |     nest:
33 |       key: 2013
34 |       contents: [earnings, sat_average]
35 |   - name: school2012.csv
36 |     nest:
37 |       key: 2012
38 |       contents: [earnings, sat_average]
39 | 


--------------------------------------------------------------------------------
/spec/fixtures/nested_files/school-data.csv:
--------------------------------------------------------------------------------
 1 | UNITID,CITY_MAIN,STABBR_MAIN,ST_FIPS_MAIN,ZIP_MAIN,REGION_MAIN,LATITUDE_MAIN,LONGITUDE_MAIN,INSTNM,SAT_AVG,earn_2002_p10,gt_25k_2006_p6
 2 | 1,Normal,AL,1,35762,5,34.7834,-86.5685,Reichert University,1195,26318,0.53
 3 | 2,Montgomery,AL,1,36109-3378,5,32.3842,-86.2164,Montgomery School,770,6785,0.61
 4 | 3,Montevallo,AL,1,35115-6000,5,33.1063,-86.8651,Indigo Card Community College,526,16767,0.50
 5 | 4,Montgomery,AL,1,36104-0271,5,32.3643,-86.2957,Warm Meadow School of Fine Art,457,1836,0.09
 6 | 5,Alexander City,AL,1,35010,5,32.9244,-85.9465,Kovacek Institute of Technology,1511,19372,0.82
 7 | 6,Athens,AL,1,35611,5,34.8056,-86.9651,Athens Institute,1057,49203,0.06
 8 | 7,Auburn University,AL,1,36849,5,32.6002,-85.4924,Alabama Beauty College of Auburn University,486,44097,0.50
 9 | 8,Birmingham,AL,1,35254,5,33.5155,-86.8536,Condemned Balloon Institute,616,59759,0.59
10 | 9,Tanner,AL,1,35671,5,34.6543,-86.9491,Inquisitive Farm College,971,34183,0.19
11 | 10,Enterprise,AL,1,36330-1300,5,31.2975,-85.837,Enterprise University,920,42629,0.59
12 | 11,Montgomery,NULL,1,36117,5,32.3643,-86.2957,Auburn University at Montgomery,940,49879,0.64
13 | 


--------------------------------------------------------------------------------
/spec/fixtures/nested_files/school2011.csv:
--------------------------------------------------------------------------------
 1 | UNITID,CITY_MAIN,STABBR_MAIN,ST_FIPS_MAIN,ZIP_MAIN,REGION_MAIN,LATITUDE_MAIN,LONGITUDE_MAIN,INSTNM,SAT_AVG,earn_2002_p10,gt_25k_2006_p6
 2 | 1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1025,4048,0.92
 3 | 2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,866,45556,0.34
 4 | 3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,453,4675,0.71
 5 | 4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,572,15466,0.34
 6 | 5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1111,11266,0.86
 7 | 6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,818,23357,0.58
 8 | 7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1392,32584,0.39
 9 | 8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,718,252,0.26
10 | 9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1297,36088,0.63
11 | 10,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,635,3259,0.70
12 | 


--------------------------------------------------------------------------------
/spec/fixtures/nested_files/school2012.csv:
--------------------------------------------------------------------------------
 1 | UNITID,CITY_MAIN,STABBR_MAIN,ST_FIPS_MAIN,ZIP_MAIN,REGION_MAIN,LATITUDE_MAIN,LONGITUDE_MAIN,INSTNM,SAT_AVG,earn_2002_p10,gt_25k_2006_p6
 2 | 1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,461,35231,0.01
 3 | 2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,986,34095,0.71
 4 | 3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1094,42579,0.39
 5 | 4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,854,37589,0.15
 6 | 5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,650,13611,0.04
 7 | 6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,797,36924,0.64
 8 | 7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,994,31799,0.60
 9 | 8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1420,30063,0.97
10 | 9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1292,42150,0.83
11 | 10,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,605,2608,0.92
12 | 11,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,2608,0.92
13 | 


--------------------------------------------------------------------------------
/spec/fixtures/nested_files/school2013.csv:
--------------------------------------------------------------------------------
 1 | UNITID,CITY_MAIN,STABBR_MAIN,ST_FIPS_MAIN,ZIP_MAIN,REGION_MAIN,LATITUDE_MAIN,LONGITUDE_MAIN,INSTNM,SAT_AVG,earn_2002_p10,gt_25k_2006_p6
 2 | 1,Normal,AL,1,35762,5,34.7834,-86.5685,Reichert University,1195,26318,0.53
 3 | 2,Montgomery,AL,1,36109-3378,5,32.3842,-86.2164,Montgomery School,770,6785,0.61
 4 | 3,Montevallo,AL,1,35115-6000,5,33.1063,-86.8651,Indigo Card Community College,526,16767,0.50
 5 | 4,Montgomery,AL,1,36104-0271,5,32.3643,-86.2957,Warm Meadow School of Fine Art,457,1836,0.09
 6 | 5,Alexander City,AL,1,35010,5,32.9244,-85.9465,Kovacek Institute of Technology,1511,19372,0.82
 7 | 6,Athens,AL,1,35611,5,34.8056,-86.9651,Athens Institute,1057,49203,0.06
 8 | 7,Auburn University,AL,1,36849,5,32.6002,-85.4924,Alabama Beauty College of Auburn University,486,44097,0.50
 9 | 8,Birmingham,AL,1,35254,5,33.5155,-86.8536,Condemned Balloon Institute,616,59759,0.59
10 | 9,Tanner,AL,1,35671,5,34.6543,-86.9491,Inquisitive Farm College,971,34183,0.19
11 | 10,Enterprise,AL,1,36330-1300,5,31.2975,-85.837,Enterprise University,920,42629,0.59
12 | 


--------------------------------------------------------------------------------
/spec/fixtures/numeric_data/data.yaml:
--------------------------------------------------------------------------------
 1 | # cities100.txt
 2 | # Test YAML file
 3 | index: numeric-data
 4 | api: cities
 5 | 
 6 | dictionary:
 7 |   name:
 8 |     source: name
 9 |     type: string
10 |   address:
11 |     source: address
12 |     type: string
13 |   city:
14 |     source: city
15 |     type: string
16 |   age:
17 |     source: age
18 |     type: integer
19 |   height:
20 |     source: height
21 |     type: float
22 | 


--------------------------------------------------------------------------------
/spec/fixtures/sample-data/data.yaml:
--------------------------------------------------------------------------------
 1 | version: cities100-2010
 2 | # cities100.txt
 3 | # National Places Gazetteer Files, from US Census 2010
 4 | # https://www.census.gov/geo/maps-data/data/gazetteer2010.html
 5 | # (head -n 1 source.txt && tail -n +2 source.txt | LC_ALL=C sort -k7rn,7 -t$'\t' source.txt) > result.txt
 6 | # head -n 101 results.txt > cities100.txt
 7 | # then convertes to csv and removed " city" from after each city name
 8 | index: city-data
 9 | api: cities
10 | unique: ['name']
11 | options:
12 |   search: dictionary_only
13 | 
14 | dictionary:
15 |   id:
16 |     source: GEOID
17 |     description: >
18 |       Geographic Identifier - fully concatenated geographic code (State FIPS and
19 |       County FIPS). The Census Bureau and other state and federal agencies are
20 |       responsible for assigning geographic identifiers, or GEOIDs, to geographic
21 |       entities to facilitate the organization, presentation, and
22 |       exchange of geographic and statistical data. GEOIDs are numeric codes that
23 |       uniquely identify all administrative/legal and statistical geographic areas for
24 |       which the Census Bureau tabulates data. From Alaska, the largest state,
25 |       to the smallest census block in New York City, every geographic area
26 |       has a unique GEOID. Data users rely on GEOIDs to join the appropriate
27 |       demographic data from censuses and surveys, such as the
28 |       American Community Survey (ACS), to various levels of geography for data
29 |       analysis, interpretation and mapping.
30 |   code:
31 |     source: ANSICODE
32 |     description: >
33 |       American National Standards Institute codes (ANSI codes)
34 |       are standardized numeric or alphabetic codes issued by the American
35 |       National Standards Institute (ANSI) to ensure uniform identification of
36 |       geographic entities through all federal government agencies.
37 |   name:
38 |     source: NAME
39 |     description: The name of the city
40 |     type: literal
41 |   state:
42 |     source: USPS
43 |     description: Two letter state abbreviation
44 |   population:
45 |     source: POP10
46 |     description: City population from 2010 Census data
47 |     type: integer
48 |   location.lat: INTPTLAT
49 |   location.lon: INTPTLONG
50 |   area.land:
51 |     description: Land Area (square miles)
52 |     source: ALAND_SQMI
53 |     type: float
54 |   area.water:
55 |     description: Water Area (square miles)
56 |     source: AWATER_SQMI
57 |     type: float
58 | 
59 | categories:
60 |   general:
61 |     title: General
62 |     description: >
63 |       general information about the city, including standard
64 |       identifiers and actual census summary data about the population of the city.
65 |     fields: [id, code, name, state, population]
66 |   geographic:
67 |     title: Geographic
68 |     description: >
69 |       Geographic characteristics of the area. These are created for
70 |       statistical purposes only.  Depiction and designation for statistical
71 |       purposes does not constitute a determination of jurisdictional authority
72 |       or rights of ownership or entitlement.
73 |     fields: [location, area.land, area.water]
74 | 
75 | files:
76 |   - name: cities100.csv
77 | 


--------------------------------------------------------------------------------
/spec/fixtures/school_names/data.yaml:
--------------------------------------------------------------------------------
 1 | version: 0
 2 | index: name-data
 3 | api: names
 4 | dictionary:
 5 |   id: ID
 6 |   school.name:
 7 |     source: NAME
 8 |     type: autocomplete
 9 |   school.state: STATE
10 | 
11 | files:
12 |   - name: school_names.csv
13 | 


--------------------------------------------------------------------------------
/spec/fixtures/school_names/school_names.csv:
--------------------------------------------------------------------------------
 1 | ID,STATE,NAME
 2 | 1,AL,Stillman College
 3 | 2,NY,New York University
 4 | 3,AZ,Arizona State University
 5 | 4,CA,University of California-Berkeley
 6 | 5,MA,Berklee College of Music
 7 | 6,NY,Berk Trade and Business School
 8 | 7,AZ,University of Phoenix-Online Campus
 9 | 8,AZ,University of Phoenix-Phoenix Campus
10 | 9,AZ,Phoenix College
11 | 


--------------------------------------------------------------------------------
/spec/fixtures/schools/data.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | version: Aug6-2015-08-10-23:48-0600
 3 | api: fakeschool
 4 | index: fakeschool-data
 5 | unique:
 6 | - id
 7 | options:
 8 |   limit_files: 1
 9 |   limit_rows: 100
10 | 
11 | dictionary:
12 |   id:
13 |     source: UNITID
14 |     type: integer
15 |     description: Unit ID for institution
16 |   school.name:
17 |     source: INSTNM
18 |     description: Institution name
19 |   school.city:
20 |     source: CITY_MAIN
21 |     description: City
22 |   school.state:
23 |     source: STABBR_MAIN
24 |     description: State postcode
25 |   school.zip:
26 |     source: ZIP_MAIN
27 |     type: integer
28 |     description: ZIP code
29 |   completion.rate.lt_four_year:
30 |     source: C150_L4_POOLED_SUPP
31 |     type: float
32 |     description: 150% completion rate for less-than-four-year institutions, pooled in two-year rolling averages and suppressed for small n size
33 |   completion.rate.four_year:
34 |     source: C150_4_POOLED_SUPP
35 |     type: float
36 |     description: 150% completion rate for four-year institutions, pooled in two-year rolling averages and suppressed for small n size
37 |   completion.rate.overall:
38 |     calculate: C150_L4_POOLED_SUPP or C150_4_POOLED_SUPP
39 |     type: float
40 |     description: 150% completion rate for the institution, independent of degree
41 | 
42 | files:
43 |   - name: schools.csv
44 | 


--------------------------------------------------------------------------------
/spec/fixtures/schools/schools.csv:
--------------------------------------------------------------------------------
 1 | UNITID,CITY_MAIN,STABBR_MAIN,ST_FIPS_MAIN,ZIP_MAIN,REGION_MAIN,LATITUDE_MAIN,LONGITUDE_MAIN,INSTNM,SAT_AVG,PREDDEG,UGDS,MENONLY,WOMENONLY,C150_4_POOLED_SUPP,C150_L4_POOLED_SUPP,earn_2002_p10,gt_25k_2006_p6
 2 | 1,Normal,AL,1,35762,5,34.7834,-86.5685,Indigo Peak School,639,1,183504,0,0,NULL,0.16,3800,0.61
 3 | 2,Montgomery,AL,1,36109-3378,5,32.3842,-86.2164,Warm Thread Beauty College,1218,3,210739,0,0,0.62,NULL,13566,0.10
 4 | 3,Montevallo,AL,1,35115-6000,5,33.1063,-86.8651,Arrogant Abyss University,613,1,116967,0,0,NULL,0,1177,0.84
 5 | 4,Montgomery,AL,1,36104-0271,5,32.3643,-86.2957,Greasy Marsh Institute,590,1,81254,0,1,NULL,NULL,54146,0.49
 6 | 5,Alexander City,AL,1,35010,5,32.9244,-85.9465,Harber Institute of Technology,1355,1,256538,1,0,0,0.91,38553,0.32
 7 | 6,Athens,AL,1,35611,5,34.8056,-86.9651,Unsightly Mountain School of Fine Art,1201,1,139899,0,0,NULL,0.87,55899,0.95
 8 | 7,Auburn University,AL,1,36849,5,32.6002,-85.4924,Auburn University College,740,3,165974,0,0,0.21,NULL,51608,0.73
 9 | 8,Birmingham,AL,1,35254,5,33.5155,-86.8536,Birmingham School,1084,1,224554,0,0,NULL,0.70,29545,0.67
10 | 9,Tanner,AL,1,35671,5,34.6543,-86.9491,Conn Institute of Technology,1171,4,87710,0,0,NULL,0.56,58307,0.63
11 | 10,Enterprise,AL,1,36330-1300,5,31.2975,-85.837,Hollow Resonance Institute,1058,2,97265,0,0,NULL,0.59,17880,0.36
12 | 


--------------------------------------------------------------------------------
/spec/fixtures/types/data.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | version: 0
 3 | # cities100.txt
 4 | # National Places Gazetteer Files, from US Census 2010
 5 | # https://www.census.gov/geo/maps-data/data/gazetteer2010.html
 6 | # (head -n 1 source.txt && tail -n +2 source.txt | LC_ALL=C sort -k7rn,7 -t$'\t' source.txt) > result.txt
 7 | # head -n 101 results.txt > cities100.txt
 8 | # then convertes to csv and removed " city" from after each city name
 9 | dictionary:
10 |   id:
11 |     source: id
12 |     type: literal
13 |   city.name:
14 |     source: name
15 |     type: name
16 |   city.state: state
17 |   location.lat: lat
18 |   location.lon: lon
19 | 
20 | index: place-data
21 | api: places
22 | files:
23 |   - name: places.csv
24 | 


--------------------------------------------------------------------------------
/spec/fixtures/types/places.csv:
--------------------------------------------------------------------------------
1 | id,state,name,lat,lon
2 | ca sf,CA,San Francisco,37.727239,-123.032229
3 | ny ny,NY,New York,40.664274,-73.938500
4 | la no,LA,New Orleans,30.068636,-89.939007
5 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/calculated_columns_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'data_magic'
 3 | 
 4 | describe "calculated columns" do
 5 | 
 6 |   before :example do
 7 |     DataMagic.destroy
 8 |     ENV['DATA_PATH'] = data_path
 9 |   end
10 |   after :example do
11 |     DataMagic.destroy
12 |   end
13 | 
14 |   describe "combine into float" do
15 |     let(:data_path) { "./spec/fixtures/schools" }
16 |     it "can combine two columns" do
17 |       DataMagic.config = DataMagic::Config.new
18 |       DataMagic.import_with_dictionary
19 |       result = DataMagic.search({}, fields: ['id', 'completion.rate.overall'])
20 |       results = result['results'].sort_by { |hash| hash['id'] }
21 |       expect(results[0]).to eq('id' => 1, 'completion.rate.overall' => 0.16)
22 |       expect(results[1]).to eq('id' => 2, 'completion.rate.overall' => 0.62)
23 |       expect(results[2]).to eq('id' => 3, 'completion.rate.overall' => nil)
24 |       expect(results[3]).to eq('id' => 4, 'completion.rate.overall' => nil)
25 |       expect(results[4]).to eq('id' => 5, 'completion.rate.overall' => 0.91)
26 |     end
27 |   end
28 | 
29 |   describe "combine into boolean" do
30 |     let(:data_path) { "./spec/fixtures/calculated_columns" }
31 |     it "can combine multiple columns" do
32 |       DataMagic.config = DataMagic::Config.new
33 |       DataMagic.import_with_dictionary
34 |       result = DataMagic.search({}, fields: %w(id summarybool))
35 |       results = result['results'].sort_by { |hash| hash['id'] }
36 |       expect(results[0]).to eq('id' => 1, 'summarybool' => true)
37 |       expect(results[1]).to eq('id' => 2, 'summarybool' => false)
38 |       expect(results[2]).to eq('id' => 3, 'summarybool' => true)
39 |     end
40 |   end
41 | end
42 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/config_field_types_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | describe 'DataMagic::Config #field_types' do
 4 |   let(:config) { DataMagic::Config.new(load_datayaml: false) }
 5 | 
 6 |   it "returns empty if dictionary is empty" do
 7 |     allow(config).to receive(:file_config).and_return([{'name' => 'one.csv'}])
 8 |     allow(config).to receive(:dictionary).and_return({})
 9 |     expect(config.field_types).to eq({})
10 |   end
11 | 
12 |   context "when no type is given" do
13 |     before do
14 |       allow(config).to receive(:file_config).and_return([{'name' => 'one.csv'}])
15 |       allow(config).to receive(:dictionary).and_return({
16 |           'name' => {source:'NAME_COLUMN'}
17 |       })
18 |     end
19 | 
20 |     it "defaults to string" do
21 |       expect(config.field_types).to eq({
22 |           'name' => 'string'
23 |       })
24 |     end
25 |   end
26 | 
27 |   it "supports integers" do
28 |     allow(config).to receive(:file_config).and_return([{'name' => 'one.csv'}])
29 |     allow(config).to receive(:dictionary).and_return(
30 |       IndifferentHash.new count:
31 |         {source:'COUNT_COLUMN', type: 'integer'}
32 |     )
33 |     expect(config.field_types).to eq({'count' => 'integer'})
34 |   end
35 | 
36 |   context "with float type" do
37 |     it "sets float mapping" do
38 |       allow(config).to receive(:file_config).and_return([{'name' => 'one.csv'}])
39 |       allow(config).to receive(:dictionary).and_return(
40 |         IndifferentHash.new percent:
41 |            {source:'PERCENT_COLUMN', type: 'float'}
42 |       )
43 |       expect(config.field_types).to eq({'percent' => 'float'})
44 |     end
45 | 
46 |     it "can be excluded" do
47 |       allow(config).to receive(:dictionary).and_return(
48 |         IndifferentHash.new id: {source:'ID', type: 'integer'},
49 |           percent: {source:'PERCENT', type: 'float'}
50 |       )
51 |       allow(config).to receive(:file_config).and_return([
52 |         IndifferentHash.new({ name:'one.csv', only: ['id'] })
53 |       ])
54 |       expect(config.field_types).to eq({'id' => 'integer'})
55 |     end
56 | 
57 |     it "can be nested" do
58 |       allow(config).to receive(:dictionary).and_return(
59 |         IndifferentHash.new id: {source:'ID', type: 'integer'},
60 |           percent: {source:'PERCENT', type: 'float'}
61 |       )
62 |       allow(config).to receive(:file_config).and_return([
63 |         IndifferentHash.new({name:'one.csv',
64 |             only: ['id']}),
65 |         IndifferentHash.new({name:'two.csv',
66 |             nest: {key: '2012', contents: ['percent']}})
67 |       ])
68 |       expect(config.field_types).to eq({
69 |           'id' => 'integer',
70 |           '2012.percent' => 'float'
71 |       })
72 |     end
73 |   end
74 | 
75 |   it "supports location.lat and location.lon fields" do
76 |     allow(config).to receive(:file_config).and_return([{'name' => 'one.csv'}])
77 |     allow(config).to receive(:dictionary).and_return(
78 |       IndifferentHash.new 'location.lat': {source:'LAT_COLUMN', type: 'float'},
79 |                           'location.lon': {source:'LON_COLUMN', type: 'float'}
80 |     )
81 |     expect(config.field_types).to eq(
82 |       {
83 |         'location.lat'=>'float',
84 |         'location.lon'=>'float'
85 |       }
86 |     )
87 |   end
88 | end
89 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/config_spec.rb:
--------------------------------------------------------------------------------
  1 | require 'spec_helper'
  2 | 
  3 | describe DataMagic::Config do
  4 |   before(:all) do
  5 |     ENV['DATA_PATH'] = './spec/fixtures/import_with_dictionary'
  6 |   end
  7 | 
  8 |   it "detects data.yml files" do
  9 |     ENV['DATA_PATH'] = './spec/fixtures/cities_with_yml'
 10 |     config = DataMagic::Config.new
 11 |     expect(config.data["api"]).to eq("cities")
 12 |   end
 13 | 
 14 |   describe 'slugification' do
 15 |     it 'slugifies local paths' do
 16 |       config = DataMagic::Config.new
 17 |       slugified = config.clean_index('path/to/my_directory')
 18 |       expect(slugified).to eq('my-directory')
 19 |     end
 20 | 
 21 |     it 'slugifes s3 bucket names' do
 22 |       config = DataMagic::Config.new
 23 |       slugified = config.clean_index('s3://user:pass@my_bucket')
 24 |       expect(slugified).to eq('my-bucket')
 25 |     end
 26 |   end
 27 | 
 28 |   context "s3" do
 29 |     it "detects data.yaml" do
 30 |       ENV['DATA_PATH'] = 's3://mybucket'
 31 |       fake_s3 = class_spy("Fake Aws::S3::Client")
 32 |       fake_get_object_response = double(
 33 |         "S3 response",
 34 |         body: StringIO.new({ 'index' => 'fake-index' }.to_yaml),
 35 |         isOK: true,
 36 |         status: 200
 37 |       )
 38 |       allow(fake_s3).to receive(:get_object)
 39 |         .with(bucket: 'mybucket', key: 'data.yaml', response_target: duck_type(:read))
 40 |         .and_return(fake_get_object_response)
 41 |       config = DataMagic::Config.new(s3: fake_s3)
 42 |       expect(config.s3).to eq(fake_s3)
 43 |       expect(config.data["index"]).to eq("fake-index")
 44 |     end
 45 | 
 46 |     it "raises error if s3 errors" do
 47 |       ENV['DATA_PATH'] = 's3://mybucket'
 48 |       fake_s3 = class_spy("Fake Aws::S3::Client")
 49 | 
 50 |       allow(fake_s3).to receive(:get_object)
 51 |         .with(bucket: 'mybucket', key: 'data.yaml', response_target: duck_type(:read))
 52 |         .and_raise(RuntimeError)
 53 |       expect {
 54 |         DataMagic::Config.new(s3: fake_s3)
 55 |       }.to raise_error(RuntimeError)
 56 |     end
 57 | 
 58 |   end
 59 | 
 60 |   context "create" do
 61 |     it "works with zero args" do
 62 |       expect(DataMagic::Config.new).to_not be_nil
 63 |     end
 64 |     it "can set s3 client" do
 65 |       # TODO: mock s3
 66 |       s3_client = "s3 client"
 67 |       config = DataMagic::Config.new(s3: s3_client)
 68 |       expect(config.s3).to eq(s3_client)
 69 |     end
 70 |   end
 71 | 
 72 |   context "when loaded" do
 73 |     let(:config) { DataMagic::Config.new }
 74 | 
 75 |     after do
 76 |       config.clear_all
 77 |     end
 78 | 
 79 |     context "#scoped_index_name" do
 80 |       it "includes environment prefix" do
 81 |         expect(config.scoped_index_name).to eq('test-city-data')
 82 |       end
 83 |     end
 84 | 
 85 |     it "has config data" do
 86 |       default_config = {
 87 |         "version" => "cities100-2010",
 88 |         "index" => "city-data", "api" => "cities",
 89 |         "files" => [{ "name" => "cities100.csv" }],
 90 |         "options" => {:search=>"dictionary_only"},
 91 |         "unique" => ["name"],
 92 |         "data_path" => "./sample-data"
 93 |       }
 94 |       expect(config.data.keys).to include('dictionary')
 95 |       dictionary = config.data.delete 'dictionary'
 96 | 
 97 |       expect(dictionary.keys.sort).to eq %w(id code name state population
 98 |         location.lat location.lon land_area area.water).sort
 99 |       categories = config.data.delete 'categories'
100 |       expect(categories.keys.sort).to eq %w(general general2 general3 general4 general5 geographic).sort
101 |       expect(config.data).to eq(default_config)
102 |     end
103 | 
104 |     it "has default page size" do
105 |       expect(DataMagic::DEFAULT_PAGE_SIZE).to_not be_nil
106 |       expect(config.page_size).to eq(DataMagic::DEFAULT_PAGE_SIZE)
107 |     end
108 | 
109 |     describe "#update_indexed_config" do # rename ... or do this in load_config or something
110 |       context "after loading config" do
111 |         let(:fixture_path) { "./spec/fixtures/import_with_dictionary" }
112 |         before do
113 |           config.load_datayaml(fixture_path)
114 |         end
115 |         it "should be true" do
116 |           expect(config.update_indexed_config).to be true
117 |         end
118 |         it "should set new data_path" do
119 |           expect(config.data_path).to eq(fixture_path)
120 |         end
121 | 
122 |         it "twice should be false" do
123 |           config.update_indexed_config
124 |           expect(config.update_indexed_config).to be false
125 |         end
126 |       end
127 |     end
128 | 
129 |     describe "when has a custom null_value" do
130 |       it 'should have a default null value' do
131 |         expect(config.null_value).to eq('NULL')
132 |       end
133 | 
134 |       it 'should set null value field' do
135 |         config.load_datayaml("./spec/fixtures/import_with_null_value")
136 |         expect(config.null_value).to eq('abc123')
137 |       end
138 |     end
139 |   end
140 | 
141 |   context ".calculated_field_list" do
142 |     let(:config) { DataMagic::Config.new(load_datayaml: false) }
143 |     it "finds fields with 'calculate' property" do
144 |       allow(config).to receive(:dictionary).and_return(
145 |         {
146 |           one: {
147 |             source: 'column1',
148 |             type: 'float'
149 |           },
150 |           two: {
151 |             source: 'column2',
152 |             type: 'float'
153 |           },
154 |           all: {
155 |                 calculate: 'column1 or column2',
156 |                 type: 'float',
157 |                 description: 'something'
158 |             }
159 |         }
160 |       )
161 |       expect(config.calculated_field_list).to eq(['all'])
162 |     end
163 |   end
164 | 
165 |   context ".only_field_list" do
166 |     let(:config) { DataMagic::Config.new(load_datayaml: false) }
167 |     let(:simple_fields) do
168 |       { 'one' => 'column1', 'two' => 'column2', 'three' => 'column3' }
169 |     end
170 |     let(:fields_with_dots) do
171 |       { 'one' => 'column1', 'two.a' => 'column2a', 'two.b' => 'column2b' }
172 |     end
173 | 
174 |     it "selects a subset" do
175 |       expect(config.only_field_list(%w(one two), simple_fields)).to eq(
176 |         'one' => 'column1', 'two' => 'column2'
177 |       )
178 |     end
179 | 
180 |     it "selects fields with dots" do
181 |       expect(config.only_field_list(%w(two), fields_with_dots)).to eq(
182 |         'two.a' => 'column2a', 'two.b' => 'column2b'
183 |       )
184 |     end
185 |   end
186 | end
187 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/create_index_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'data_magic'
 3 | 
 4 | describe "DataMagic #init" do
 5 |   before (:all) do
 6 |     ENV['DATA_PATH'] = './spec/fixtures/import_with_dictionary'
 7 |   end
 8 | 
 9 |   after(:each) do
10 |     DataMagic.destroy
11 |   end
12 | 
13 |   context "with no options" do
14 |     it "creates index only once" do
15 |       expect(DataMagic).to receive(:create_index).once
16 |       DataMagic.init
17 |     end
18 | 
19 |     it "creates index" do
20 |       DataMagic.init
21 |       expect(DataMagic.config.index_exists?).to be true
22 |     end
23 | 
24 |     it "does not re-create index with subsequent call to #import_with_dictionary" do
25 |       expect(DataMagic).to receive(:create_index).once
26 |       DataMagic.init
27 |       DataMagic.import_with_dictionary
28 |     end
29 |   end
30 | 
31 | 
32 |   context "with load_now: false" do
33 |     it "does not call #create_index" do
34 |       expect(DataMagic).not_to receive(:create_index)
35 |       DataMagic.init(load_now: false)
36 |     end
37 | 
38 |     it "does not create index" do
39 |       DataMagic.init(load_now: false)
40 |       expect(DataMagic.config.index_exists?).to be false
41 |     end
42 | 
43 |     it "creates index with subsequent call to #import_with_dictionary" do
44 |       DataMagic.init(load_now: false)
45 |       DataMagic.import_with_dictionary
46 |       expect(DataMagic.config.index_exists?).to be true
47 |     end
48 | 
49 |     it "creates index with subsequent call to #import_csv" do
50 |       ENV['DATA_PATH'] = './spec/fixtures/minimal'
51 |       DataMagic.init(load_now: false)
52 |       data_str = <<-eos
53 |       a,b
54 |       1,2
55 |       3,4
56 |       eos
57 |       data = StringIO.new(data_str)
58 |       DataMagic.import_csv(data)
59 |       expect(DataMagic.config.index_exists?).to be true
60 |     end
61 |   end
62 | end


--------------------------------------------------------------------------------
/spec/lib/data_magic/example_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | describe Example do
 4 |   let(:hash) do
 5 |     { name: 'foo',
 6 |       description: 'interesting thing',
 7 |       params: 'a=1&b=something',
 8 |       endpoint: 'api' }
 9 |   end
10 |   subject(:e) { Example.new(hash) }
11 | 
12 |   it "has a name" do
13 |     expect(e.name).to eq(hash[:name])
14 |   end
15 |   it "has a description" do
16 |     expect(e.description).to eq(hash[:description])
17 |   end
18 |   it "has a params" do
19 |     expect(e.params).to eq(hash[:params])
20 |   end
21 |   it "has an endpoint" do
22 |     expect(e.endpoint).to eq(hash[:endpoint])
23 |   end
24 | 
25 |   it "has a link" do
26 |     expect(e.link).to eq("/v1/#{e.endpoint}?#{e.params}")
27 |   end
28 | end
29 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/import_csv_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'data_magic'
 3 | 
 4 | describe "DataMagic #import_csv" do
 5 |   before do
 6 |     ENV['DATA_PATH'] = './spec/fixtures/minimal'
 7 |     DataMagic.init(load_now: false)
 8 |   end
 9 |   after do
10 |     DataMagic.destroy
11 |     #expect(DataMagic.client.indices.get(index: '_all')).to be_empty
12 |   end
13 | 
14 |   it "throws errors for bad format" do
15 |     data = StringIO.new("not csv format")
16 |     expect{DataMagic.import_csv(data)}.to raise_error(DataMagic::InvalidData)
17 |   end
18 | 
19 |   it "reads file and reports number of rows and headers" do
20 |     data_str = <<-eos
21 | a,b
22 | 1,2
23 | 3,4
24 | eos
25 |     data = StringIO.new(data_str)
26 |     num_rows, fields = DataMagic.import_csv(data)
27 |     expect(num_rows).to be(2)
28 |     expect(fields).to eq(['a', 'b'])
29 |   end
30 | 
31 | end
32 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/import_with_delta_file_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'data_magic'
 3 | 
 4 | describe "delta update" do
 5 | 
 6 |   before :example do
 7 |     DataMagic.destroy
 8 |     ENV['DATA_PATH'] = './spec/fixtures/nested_delta_files'
 9 |     DataMagic.config = DataMagic::Config.new
10 |     DataMagic.import_with_dictionary
11 |     DataMagic.import_with_delta({delta_original: 'latest-school-data.csv', delta_update: 'latest-school-data_update1.csv'})
12 |   end
13 |   after :example do
14 |     DataMagic.destroy
15 |   end
16 |   let(:query)   { {} }
17 |   let(:sort)    { nil }
18 |   let(:result)  { DataMagic.search(query, sort: sort) }
19 |   let(:first)   { result['results'].first }
20 |   let(:id_one)   { result['results'].find { |item| item['id'] == '1' } }
21 |   let(:total)   { result['metadata']['total'] }
22 | 
23 |   it "updates one document per unique id" do
24 |     expect(total).to eq(11)
25 |   end
26 | 
27 |   it "updates root document :delta_only fields" do
28 |     expect(id_one['id']).to eq('1')
29 |     expect(id_one['under_investigation']).to eq(1)
30 |   end
31 | 
32 |   it "does not update root document fields not specified in :delta_only" do
33 |     expect(id_one['name']).to eq('Reichert University')
34 |   end
35 | 
36 |   it "updates nested documents per unique id" do
37 |     expect(id_one['latest']).to_not be_nil
38 |     expect(id_one['latest']['earnings']['6_yrs_after_entry']['median']).to eq(30000)
39 |   end
40 | 
41 |   it "does not update nested documents in non-delta files" do
42 |     expect(id_one['id']).to eq('1')
43 |     expect(id_one['2013']).to_not be_nil
44 |     expect(id_one['2013']['earnings']['6_yrs_after_entry']['median']).to eq(26318)
45 |   end
46 | 
47 |   context "can import a subset of fields" do
48 |     context "and when searching for a field value" do
49 |       let(:query) { {zipcode: "35762"} }
50 |       it "and doesn't find column" do
51 |         expect(total).to eq(0)
52 |       end
53 |     end
54 |     it "and doesn't include extra field" do
55 |       expect(first['zipcode']).to be(nil)
56 |     end
57 |   end
58 | 
59 |   context "when searching on a nested field" do
60 |     let(:query) { { 'latest.earnings.6_yrs_after_entry.median' => 30000 } }
61 |     it "can find the correct results" do
62 |       expect(total).to eq(1)
63 |       expect(first['latest']['earnings']['6_yrs_after_entry']).to eq({"percent_gt_25k"=>0.53, "median"=>30000})
64 |     end
65 |   end
66 | 
67 |   context "when sorting by a nested field" do
68 |     let(:sort) { 'latest.earnings.6_yrs_after_entry.median' }
69 |     it "can find the right first result" do
70 |       expect(total).to eq(11)
71 |       expect(first['latest']['earnings']['6_yrs_after_entry']).to eq({"percent_gt_25k"=>0.1, "median"=>1900})
72 |     end
73 |   end
74 | end
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/import_with_nested_files_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'data_magic'
 3 | 
 4 | describe "unique key(s)" do
 5 | 
 6 |   before :example do
 7 |     DataMagic.destroy
 8 |     ENV['DATA_PATH'] = './spec/fixtures/nested_files'
 9 |     DataMagic.config = DataMagic::Config.new
10 |     DataMagic.import_with_dictionary
11 |   end
12 |   after :example do
13 |     DataMagic.destroy
14 |   end
15 |   let(:query)   { {} }
16 |   let(:sort)    { nil }
17 |   let(:result)  { DataMagic.search(query, sort: sort) }
18 |   let(:first)   { result['results'].first }
19 |   let(:id_one)   { result['results'].find { |item| item['id'] == '1' } }
20 |   let(:total)   { result['metadata']['total'] }
21 | 
22 |   it "creates one document per unique id" do
23 |     expect(total).to eq(11)
24 |   end
25 | 
26 |   it "nests documents per unique id" do
27 |     expect(id_one['id']).to eq('1')
28 |     expect(id_one['2013']).to_not be_nil
29 |   end
30 | 
31 |   it "root document contains special 'only' fields" do
32 |     expect(id_one['id']).to eq('1')
33 |     expect(id_one['name']).to eq('Reichert University')
34 |     expect(id_one['city']).to eq('Normal')
35 |     expect(id_one['state']).to eq('AL')
36 |   end
37 | 
38 |   context "can import a subset of fields" do
39 |     context "and when searching for a field value" do
40 |       let(:query) { {zipcode: "35762"} }
41 |       it "and doesn't find column" do
42 |         expect(total).to eq(0)
43 |       end
44 |     end
45 |     it "and doesn't include extra field" do
46 |       expect(first['zipcode']).to be(nil)
47 |     end
48 |   end
49 | 
50 |   context "when searching on a nested field" do
51 |     let(:query) { { '2013.earnings.6_yrs_after_entry.median' => 26318 } }
52 |     it "can find the correct results" do
53 |       expect(total).to eq(1)
54 |       expect(first['2013']['earnings']['6_yrs_after_entry']).to eq({"percent_gt_25k"=>0.53, "median"=>26318})
55 |     end
56 |   end
57 | 
58 |   context "when sorting by a nested field" do
59 |     let(:sort) { '2013.earnings.6_yrs_after_entry.median' }
60 |     it "can find the right first result" do
61 |       expect(total).to eq(11)
62 |       expect(first['2013']['earnings']['6_yrs_after_entry']).to eq({"percent_gt_25k"=>0.09, "median"=>1836})
63 |     end
64 |   end
65 | end
66 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/import_without_data_yaml_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'data_magic'
 3 | 
 4 | describe "DataMagic #import_without_data_yaml" do
 5 |   describe "without ALLOW_MISSING_YML" do
 6 |     it "not found locally raises error" do
 7 |       ENV['DATA_PATH'] = './spec/fixtures/cities_without_yml'
 8 |       expect {
 9 |         DataMagic.init(load_now: true)
10 |       }.to raise_error(IOError, "No data.y?ml found at ./spec/fixtures/cities_without_yml. Did you mean to define ALLOW_MISSING_YML environment variable?")
11 |     end
12 |     it "not found on s3 raises error" do
13 |       ENV['DATA_PATH'] = 's3://mybucket'
14 |       fake_s3 = Aws::S3::Client.new(stub_responses: true)
15 |       fake_s3.stub_responses(:get_object, Aws::S3::Errors::NoSuchKey.new(Seahorse::Client::RequestContext, 'Fake Error'))
16 |       expect {
17 |         config = DataMagic::Config.new(s3: fake_s3)
18 |       }.to raise_error(IOError, "No data.y?ml found at s3://mybucket. Did you mean to define ALLOW_MISSING_YML environment variable?")
19 |     end
20 | 
21 |   end
22 |   describe "with ALLOW_MISSING_YML" do
23 |     let (:expected) do
24 |       {
25 |         "metadata" => {
26 |           "total" => 1,
27 |           "page" => 0,
28 |           "per_page" => DataMagic::DEFAULT_PAGE_SIZE
29 |         },
30 |         "results" => 	[]
31 |       }
32 |     end
33 | 
34 |     before(:all) do
35 |       DataMagic.destroy
36 |       ENV['ALLOW_MISSING_YML'] = 'allow'
37 |       ENV['DATA_PATH'] = './spec/fixtures/cities_without_yml'
38 |       DataMagic.init(load_now: true)
39 |     end
40 |     after(:all) do
41 |       DataMagic.destroy
42 |       ENV['ALLOW_MISSING_YML'] = ''
43 |     end
44 | 
45 |     it "can get list of imported csv files" do
46 |       file_list = [
47 |         "./spec/fixtures/cities_without_yml/cities50.csv",
48 |         "./spec/fixtures/cities_without_yml/cities51-100.csv",
49 |         "./spec/fixtures/cities_without_yml/more.csv",
50 |       ]
51 |       expect(DataMagic.config.files.sort).to eq(file_list)
52 |     end
53 | 
54 |     it "can get index name from api endpoint" do
55 |       expect(DataMagic.config.find_index_for('cities-without-yml')).to eq('cities-without-yml')
56 |     end
57 | 
58 |     it "indexes files with yaml mapping" do
59 |       result = DataMagic.search({NAME: "Chicago"}, api: 'cities-without-yml')
60 |       expected["results"] = [
61 |         {
62 |           "USPS"=>"IL",
63 |           "GEOID"=>"1714000",
64 |           "ANSICODE"=>"00428803",
65 |           "NAME"=>"Chicago",
66 |           "LSAD"=>"25",
67 |           "FUNCSTAT"=>"A",
68 |           "POP10"=>"2695598",
69 |           "HU10"=>"1194337",
70 |           "ALAND"=>"589571105",
71 |           "AWATER"=>"16781658",
72 |           "ALAND_SQMI"=>"227.635",
73 |           "AWATER_SQMI"=>"6.479",
74 |           "INTPTLAT"=>"41.837551",
75 |           "INTPTLONG"=>"-87.681844",
76 |         }
77 |       ]
78 |       expect(result).to eq(expected)
79 |     end
80 |   end
81 | end
82 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/index/document_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'data_magic'
 3 | 
 4 | describe DataMagic::Index::Document do
 5 |   before do
 6 |     allow(DataMagic).to receive(:config).and_return(config)
 7 |   end
 8 | 
 9 |   let(:document) { DataMagic::Index::Document.new(data) }
10 |   let(:config) { DataMagic::Config.new() }
11 |   let(:data) { {} }
12 | 
13 |   context 'when configured without any unique keys' do
14 |     before do
15 |       config.data['unique'] = []
16 |     end
17 | 
18 |     it 'id should be nil' do
19 |       expect(document.id).to be(nil)
20 |     end
21 | 
22 |     it 'id should not be empty though' do
23 |       expect(document.id_empty?).to be_falsey
24 |     end
25 |   end
26 | 
27 |   context 'when configured with the default keys' do
28 |     context 'and there is no data' do
29 |       it 'id should be an empty string' do
30 |         expect(document.id).to eq('')
31 |       end
32 | 
33 |       it 'id should be considered empty' do
34 |         expect(document.id_empty?).to be_truthy
35 |       end
36 |     end
37 | 
38 |     context 'when there is data' do
39 |       let(:data) {
40 |         {"name" => "foo", "state"=>"MA"}
41 |       }
42 | 
43 |       it 'id should be the value for the name key' do
44 |         expect(document.id).to eq('foo')
45 |       end
46 | 
47 |       it 'id should not be considered empty' do
48 |         expect(document.id_empty?).to be_falsey
49 |       end
50 |     end
51 |   end
52 | 
53 |   context 'with custom id configuration' do
54 |     let(:data) {
55 |       {"name" => "foo", "state"=>"MA"}
56 |     }
57 | 
58 |     before do
59 |       config.data['unique'] = ['name', 'state']
60 |     end
61 | 
62 |     it 'id should build the right id for the data' do
63 |       expect(document.id).to eq('foo:MA')
64 |     end
65 | 
66 |     it 'id should not be considered empty' do
67 |       expect(document.id_empty?).to be_falsey
68 |     end
69 |   end
70 | end
71 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/index/event_logger_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'data_magic'
 3 | 
 4 | describe DataMagic::Index::EventLogger do
 5 |   let(:event_logger) {
 6 |     l = DataMagic::Index::EventLogger.new
 7 |     allow(l).to receive(:logger).and_return(logger)
 8 |     l
 9 |   }
10 | 
11 |   let(:logger) { double('logger') }
12 | 
13 |   context 'when triggering an event with only a message argument' do
14 |     it 'logs the message with the right level' do
15 |       expect(logger).to receive(:info).with('hey!')
16 |       event_logger.trigger('info', 'hey!')
17 | 
18 |       expect(logger).to receive(:debug).with('what happened?')
19 |       event_logger.trigger('debug', 'what happened?')
20 | 
21 |       expect(logger).to receive(:warn).with('dude? everything ok?')
22 |       event_logger.trigger('warn', 'dude? everything ok?')
23 | 
24 |       expect(logger).to receive(:error).with('FIRE IN THE HOLE!')
25 |       event_logger.trigger('error', 'FIRE IN THE HOLE!')
26 |     end
27 |   end
28 | 
29 |   context 'when triggering an event with a message and an object' do
30 |     it 'logs as a key value pair with an inspection of the object' do
31 |       expect(logger).to receive(:info).with("foo: {:wild=>\"bar\"}")
32 |       event_logger.trigger('info', 'foo', {wild: 'bar'})
33 |     end
34 | 
35 |     it 'will shorten the object inspection when provided a limit' do
36 |       expect(logger).to receive(:warn).with("foo: {:wild")
37 |       event_logger.trigger('warn', 'foo', {wild: 'bar'}, 5)
38 |     end
39 |   end
40 | end
41 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/index/importer_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'data_magic'
 3 | 
 4 | describe "DataMagic::Index::Importer" do
 5 |   before do
 6 |     ENV['DATA_PATH'] = './spec/fixtures/minimal'
 7 |     DataMagic.init(load_now: false)
 8 |   end
 9 |   after do
10 |     DataMagic.destroy
11 |   end
12 | 
13 |   it "indexes in parallel based on NPROCS" do
14 |     stub_const('ENV', { 'NPROCS' => '2' })
15 | 
16 |     data_str = <<-eos
17 | a,b
18 | 1,2
19 | 3,4
20 | eos
21 |     data = StringIO.new(data_str)
22 |     num_rows, fields = DataMagic.import_csv(data)
23 |     expect(num_rows).to be(2)
24 |     expect(fields).to eq(['a', 'b'])
25 |   end
26 | end
27 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/index/repository_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'data_magic'
 3 | 
 4 | describe DataMagic::Index::Repository do
 5 |   let(:repository) { DataMagic::Index::Repository.new(super_client, document) }
 6 | 
 7 |   let(:super_client) { double('super client', {index_name: 'index', nested_partial?: false}) }
 8 |   let(:document) { double('document', {id: 'id', data: 'data'}) }
 9 | 
10 |   context 'when super client is creating' do
11 |     before do
12 |       allow(super_client).to receive(:creating?).and_return(true)
13 |       allow(super_client).to receive(:index)
14 |     end
15 | 
16 |     it '#save creates an index' do
17 |       expect(super_client).to receive(:index).with({
18 |         index: 'index',
19 |         id: 'id',
20 |         type: 'document',
21 |         body: 'data',
22 |         timeout: '5m'
23 |       })
24 |       repository.save
25 |     end
26 | 
27 |     it '#save will not be skipped when successful' do
28 |       repository.save
29 |       expect(repository.skipped?).to be_falsey
30 |     end
31 |   end
32 | 
33 |   context 'when super client is not creating' do
34 |     before do
35 |       allow(super_client).to receive(:creating?).and_return(false)
36 |       allow(super_client).to receive(:allow_skips?)
37 |       allow(super_client).to receive(:update)
38 |     end
39 | 
40 |     it '#save updates an index' do
41 |       expect(super_client).to receive(:update).with({
42 |         index: 'index',
43 |         id: 'id',
44 |         type: 'document',
45 |         body: {doc: 'data'},
46 |         timeout: '5m'
47 |       })
48 |       repository.save
49 |     end
50 | 
51 |     it '#save will not be skipped when successful' do
52 |       repository.save
53 |       expect(repository.skipped?).to be_falsey
54 |     end
55 |   end
56 | 
57 |   context 'when super client is not creating, not skipping and an error is raised' do
58 |     before do
59 |       allow(super_client).to receive(:creating?).and_return(false)
60 |       allow(super_client).to receive(:allow_skips?).and_return(false)
61 |     end
62 | 
63 |     it '#save raises an error' do
64 |       allow(super_client).to receive(:update).and_raise(Elasticsearch::Transport::Transport::Errors::NotFound)
65 |       expect {
66 |         repository.save
67 |       }.to raise_error(Elasticsearch::Transport::Transport::Errors::NotFound)
68 |     end
69 |   end
70 | 
71 |   context 'when super client is not creating, skipping and an error is raised' do
72 |     before do
73 |       allow(super_client).to receive(:creating?).and_return(false)
74 |       allow(super_client).to receive(:allow_skips?).and_return(true)
75 |     end
76 | 
77 |     it '#save marks the repository as skipped' do
78 |       allow(super_client).to receive(:update).and_raise(Elasticsearch::Transport::Transport::Errors::NotFound)
79 |       expect {
80 |         repository.save
81 |       }.not_to raise_error
82 |       expect(repository.skipped?).to eq(true)
83 |     end
84 |   end
85 | end
86 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/name_type_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'data_magic'
 3 | 
 4 | describe "DataMagic name types" do
 5 | 
 6 |   before :example do
 7 |     DataMagic.destroy
 8 |     ENV['DATA_PATH'] = './spec/fixtures/types'
 9 |     DataMagic.init(load_now: true)
10 |   end
11 |   after :example do
12 |     DataMagic.destroy
13 |   end
14 | 
15 |   it "can search for one word" do
16 |     response = DataMagic.search({'city.name' => 'New'}, fields:['city.name'])
17 |     results = response['results'].sort {|a,b| a['city.name'] <=> b['city.name']}
18 |     expect(results).to eq(
19 |       [{"city.name"=>"New Orleans"}, {"city.name"=>"New York"}])
20 |   end
21 | 
22 |   it "can search for multiple words" do
23 |     response = DataMagic.search({'city.name' => 'New York'}, fields:['city.name'])
24 |     results = response['results']
25 |     expect(results).to eq(
26 |       [{"city.name"=>"New York"}])
27 |   end
28 | 
29 |   it "can search for partial words" do
30 |     response = DataMagic.search({'city.name' => 'S Fran'}, fields:['city.name'])
31 |     results = response['results']
32 |     expect(results).to eq(
33 |       [{"city.name"=>"San Francisco"}])
34 |   end
35 | 
36 |   it "is not case sensitive" do
37 |     response = DataMagic.search({'city.name' => 'nEW'}, fields:['city.name'])
38 |     results = response['results'].sort {|a,b| a['city.name'] <=> b['city.name']}
39 |     expect(results).to eq(
40 |       [{"city.name"=>"New Orleans"}, {"city.name"=>"New York"}])
41 |   end
42 | end
43 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/nested_and_non_nested_combos_spec.rb:
--------------------------------------------------------------------------------
  1 | require 'spec_helper'
  2 | require 'data_magic'
  3 | require 'hashie'
  4 | 
  5 | describe DataMagic::QueryBuilder do
  6 | 
  7 |   before :example do
  8 |     DataMagic.destroy
  9 |     DataMagic.client
 10 |     ENV['DATA_PATH'] = './spec/fixtures/nested_data_type'
 11 |     DataMagic.config = DataMagic::Config.new
 12 |   end
 13 | 
 14 |   after :example do
 15 |     DataMagic.destroy
 16 |   end
 17 | 
 18 |   RSpec.configure do |c|
 19 |     c.alias_it_should_behave_like_to :it_correctly, 'correctly:'
 20 |   end
 21 | 
 22 |   let(:nested_meta) { { post_es_response: {}, from: 0, size: 20, _source: false } }
 23 |   let(:options) { {} }
 24 |   let(:query_hash) { DataMagic::QueryBuilder.from_params(subject, options, DataMagic.config) }
 25 |   let(:non_nested_range) {{
 26 |     or: [{
 27 |       range: {
 28 |         "school.degrees_awarded.predominant": {
 29 |           "gte": 1,
 30 |           "lte": 3
 31 |         }
 32 |       }
 33 |     }]
 34 |   }}
 35 |   let(:non_nested_autocomplete) {{
 36 |     common: {
 37 |       "school.name" => {
 38 |         query: "arizona",
 39 |         cutoff_frequency: 0.001,
 40 |         low_freq_operator: "and"
 41 |       }
 42 |     }
 43 |   }}
 44 |   let(:nested_match) {{
 45 |     nested: {
 46 |       inner_hits: {
 47 |           size: DataMagic::MAX_NESTED_RESULT
 48 |       },
 49 |       path: "2016.programs.cip_4_digit",
 50 |       query: {
 51 |         bool: {
 52 |           filter: [{
 53 |             bool: {
 54 |               must: [{
 55 |                 match: { "2016.programs.cip_4_digit.code" => "1312" }
 56 |               }]
 57 |             }
 58 |           }]
 59 |         }
 60 |       }
 61 |     }
 62 | }}
 63 | 
 64 |   shared_examples "builds a query" do
 65 |     it "with a query section" do
 66 |       expect(query_hash[:query]).to eql expected_query
 67 |     end
 68 |     it "with query metadata" do
 69 |         expect(query_hash.reject { |k, _| k == :query }).to eql nested_meta
 70 |     end
 71 |   end
 72 | 
 73 |   describe "appropriately combines queries for nested and non-nested datatypes" do
 74 |     context "both queries are match queries" do
 75 |       subject {{ 
 76 |         "2016.programs.cip_4_digit.code" => "1312",
 77 |         "id" => "243744"
 78 |       }}
 79 | 
 80 |       let(:expected_query) {{
 81 |         bool: {
 82 |           must: { match: { "id" => "243744" }},
 83 |           filter: nested_match
 84 |         }
 85 |       }}
 86 | 
 87 |       it_correctly "builds a query"
 88 |     end
 89 | 
 90 |     context "non-nested query is an autocomplete query and nested query is a match query" do
 91 |       subject {{ 
 92 |         "2016.programs.cip_4_digit.code" => "1312",
 93 |         "school.name" => "arizona"
 94 |       }}
 95 | 
 96 |       let(:expected_query) {{
 97 |         bool: {
 98 |           must: non_nested_autocomplete,
 99 |           filter: nested_match
100 |         }
101 |       }}
102 | 
103 |       it_correctly "builds a query"
104 | 
105 | 
106 |     end
107 | 
108 |     context "non-nested query is an range query and nested query is a match query" do
109 |       subject {{ 
110 |         "2016.programs.cip_4_digit.code" => "1312",
111 |         "school.degrees_awarded.predominant__range" => "1..3"
112 |       }}
113 | 
114 |       let(:expected_query) {{
115 |         bool: {
116 |           filter: [
117 |             non_nested_range, 
118 |             nested_match
119 |           ]
120 |         }
121 |       }}
122 | 
123 |       it_correctly "builds a query"
124 |     end
125 | 
126 |     context "query includes non-nested range query and autocomplete query and nested query is a match query" do
127 |       subject {{ 
128 |         "2016.programs.cip_4_digit.code" => "1312",
129 |         "school.degrees_awarded.predominant__range" => "1..3",
130 |         "school.name" => "arizona"
131 |       }}
132 | 
133 |       let(:expected_query) {{
134 |         bool: {
135 |           filter: [
136 |             non_nested_range, 
137 |             nested_match,
138 |             {
139 |               bool: {
140 |                 must: non_nested_autocomplete
141 |               }
142 |             }
143 |           ]
144 |         }
145 |       }}
146 | 
147 |       it_correctly "builds a query"
148 |     end
149 |   end
150 | end


--------------------------------------------------------------------------------
/spec/lib/data_magic/search_name_spec.rb:
--------------------------------------------------------------------------------
  1 | require 'spec_helper'
  2 | require 'data_magic'
  3 | require 'csv'
  4 | 
  5 | describe "DataMagic intuitive search" do
  6 | 
  7 |   before :example do
  8 |     DataMagic.destroy
  9 |     ENV['DATA_PATH'] = './spec/fixtures/school_names'
 10 |     DataMagic.init(load_now: true)
 11 |   end
 12 |   after :example do
 13 |     DataMagic.destroy
 14 |   end
 15 | 
 16 |   RSpec.configure do |c|
 17 |     c.alias_it_should_behave_like_to :it_correctly, 'correctly:'
 18 |   end
 19 | 
 20 |   let(:expected_meta) {{"metadata"=>{"total"=>1, "page"=>0, "per_page"=>20}}}
 21 |   let(:expected_match) { "" }
 22 |   let(:response) {  DataMagic.search(
 23 |                     {'school.name' => subject}, fields:['school.name']) }
 24 | 
 25 |   context "full request" do
 26 |     let(:response) {  DataMagic.search({id: 1}) }
 27 |     let(:expected_match) { [{"id"=>"1", "school"=>{"state"=>"AL", "name"=>"Stillman College"}}]}
 28 |     it "provides expected document" do
 29 |       expect(response['results']).to eql expected_match
 30 |     end
 31 |   end
 32 | 
 33 |   context "sort" do
 34 |      shared_examples "returns" do
 35 |        it "sorted results " do
 36 |          expect(response['results'].map { |i| i['school.name'] })
 37 |                  .to eql expected_match
 38 |        end
 39 |      end
 40 | 
 41 |      context "with list of names" do
 42 |        let(:response) {  DataMagic.search({}, fields:['school.name'],
 43 |                           sort: 'school.name') }
 44 |                           # fields:['name'],
 45 |        let(:expected_match) {
 46 |          csv_path = File.expand_path("../../fixtures/school_names/school_names.csv", __dir__)
 47 |          data = CSV.read(csv_path).slice(1..-1)
 48 |          data.map { |row| row[2] }
 49 |                    .sort.slice(0,20)
 50 |        }
 51 |        it_correctly "returns"
 52 |      end
 53 | 
 54 |   end
 55 | 
 56 |    context "basic search" do
 57 |     shared_examples "finds" do
 58 |       it "correct results " do
 59 |         expect(response['results']
 60 |                 .map { |i| i['school.name'] }
 61 |                 .sort )
 62 |                 .to eql expected_match
 63 |       end
 64 |       it "correct metadata" do
 65 |         expect(response.reject { |k, _| k == 'results' }).to eql expected_meta
 66 |       end
 67 |     end
 68 | 
 69 |     context "for exact match" do
 70 |       subject { 'New York University' }
 71 |       let(:expected_match) { ['New York University'] }
 72 |       it_correctly "finds"
 73 |     end
 74 |     context "for exact match (case insensitive)" do
 75 |       subject { 'new YORK UniverSity' }
 76 |       let(:expected_match) { ['New York University'] }
 77 |       it_correctly "finds"
 78 |     end
 79 | 
 80 |     context "for exact match (case insensitive)" do
 81 |       subject { 'new YORK UniverSity' }
 82 |       let(:expected_match) { ['New York University'] }
 83 |       it_correctly "finds"
 84 |     end
 85 | 
 86 |     context "by prefix" do
 87 |       subject { 'Still' }
 88 |       let(:expected_match) { ['Stillman College'] }
 89 |       it_correctly "finds"
 90 |     end
 91 | 
 92 |     context "by prefix (case insensitive)" do
 93 |       subject { 'still' }
 94 |       let(:expected_match) { ['Stillman College'] }
 95 |       it_correctly "finds"
 96 |     end
 97 | 
 98 |     context "by prefix in the middle of the name" do
 99 |       subject { 'Phoenix' }
100 |       let(:expected_meta) {{"metadata"=>{"total"=>3, "page"=>0, "per_page"=>20}}}
101 |       let(:expected_match) { ['Phoenix College',
102 |                               'University of Phoenix-Online Campus',
103 |                               "University of Phoenix-Phoenix Campus"] }
104 |       it_correctly "finds"
105 |     end
106 | 
107 |     context "with words in the wrong order" do
108 |       subject { 'University New York' }
109 |       let(:expected_match) { ['New York University'] }
110 |       it_correctly "finds"
111 |     end
112 | 
113 |     context "partial word after dash" do
114 |       subject { 'berk' }
115 |       let(:expected_meta) {{"metadata"=>{"total"=>3, "page"=>0, "per_page"=>20}}}
116 |       let(:expected_match) { ['Berk Trade and Business School',
117 |                               'Berklee College of Music',
118 |                               'University of California-Berkeley'] }
119 |       it_correctly "finds"
120 |     end
121 | 
122 |     context "words separated by dash" do
123 |       subject { 'phoenix online' }
124 |       let(:expected_match) { ['University of Phoenix-Online Campus'] }
125 |       it_correctly "finds"
126 |     end
127 |   end
128 |   # TO DO
129 |   # "pheonix" (mis-spelling) should probably work
130 |   # "phoenix college" should also probably return "university of phoenix" --- since college is a synonym for unversity
131 | 
132 | end
133 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'data_magic'
 3 | require 'fixtures/data.rb'
 4 | 
 5 | describe DataMagic do
 6 |   it "cleans up after itself" do
 7 |     DataMagic.init(load_now: true)
 8 |     DataMagic.destroy
 9 |     DataMagic.logger.info "just destroyed"
10 |     #expect(DataMagic.client.indices.get(index: '_all')).to be_empty
11 |   end
12 | 
13 |   describe '.es_field_types' do
14 |     it 'returns the given fields with their specified type' do
15 |       expect(described_class.es_field_types({ 'state' => 'string', land_area: 'string' }))
16 |       .to eq("state" => { :type => "string" },
17 |           :land_area => { :type => "string" })
18 |     end
19 | 
20 |     context 'with custom type "literal"' do
21 |       it 'returns string type with :index of "not_analyzed"' do
22 |         expect(described_class.es_field_types({ 'state' => 'string', 'name' => 'literal' }))
23 |         .to eq({"state"=>{:type=>"string"}, "name"=>{:type=>"string", :index=>"not_analyzed"}})
24 |       end
25 |     end
26 | 
27 |   end
28 | 
29 | end
30 | 


--------------------------------------------------------------------------------
/spec/lib/expression/eval_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'expression/parser'
 2 | require 'expression/eval'
 3 | 
 4 | describe Expression::Eval do
 5 | 
 6 |   let(:parser) { Expression::Parser.new }
 7 |   let(:eval) { Expression::Eval.new }
 8 |   let(:values) {{ 'f' => 0, 't' => 1 }}
 9 | 
10 |   it "simple 'or'" do
11 |     expect(
12 |       eval.apply(parser.parse('t or f'), variables: values)
13 |     ).to eq(1)
14 |   end
15 | 
16 |   describe "simple 'and'" do
17 |     it "true and false" do
18 |       expect(
19 |         eval.apply(parser.parse('t and f'), variables: values)
20 |       ).to eq(0)
21 |     end
22 | 
23 |     it "false and true" do
24 |       expect(
25 |         eval.apply(parser.parse('f and t'), variables: values)
26 |       ).to eq(0)
27 |     end
28 |   end
29 | 
30 |   it "multiple operands" do
31 |     expect(
32 |       eval.apply(parser.parse('f or f or t'), variables: values)
33 |     ).to eq(1)
34 |   end
35 | 
36 |   describe "parens" do
37 |     it "nested 'or'" do
38 |       expect(
39 |         eval.apply(parser.parse('(f or t) and t'), variables: values)
40 |       ).to eq(1)
41 |     end
42 | 
43 |     it "nested 'and'" do
44 |       expect(
45 |         eval.apply(parser.parse('(f and t) or f'), variables: values)
46 |       ).to eq(0)
47 |     end
48 |   end
49 | end
50 | 


--------------------------------------------------------------------------------
/spec/lib/expression/parser_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'expression/parser'
 2 | 
 3 | describe Expression::Parser do
 4 | 
 5 |   let(:parser) { Expression::Parser.new }
 6 |   describe 'vars' do
 7 |     it "parses one" do
 8 |       expect(parser.parse('one')).to eq(var: 'one')
 9 |     end
10 |     it "preserves case " do
11 |       expect(parser.parse('ONe')).to eq(var: 'ONe')
12 |     end
13 |     it "consumes trailing white space" do
14 |       expect(parser.parse('one    ')).to eq(var: 'one')
15 |     end
16 |   end
17 | 
18 |   it "parses or expression" do
19 |     expect(parser.parse('apples or oranges')).to eq(
20 |       {or: {left: {var: "apples"}, right: {var: "oranges"}}}
21 |     )
22 |   end
23 | 
24 |   it "parses and expression" do
25 |     expect(parser.parse('apples and oranges')).to eq(
26 |       {and: {left: {var: "apples"}, right: {var: "oranges"}}}
27 |     )
28 |   end
29 | 
30 |   describe "parens" do
31 |     it "nested 'or'" do
32 |       expect(parser.parse('(apples or cranberries) and nuts')).to eq(
33 |         {:and => {
34 |           :left=>{:or=>{:left=>{:var=>"apples"}, :right=>{:var=>"cranberries"}}},
35 |           :right=>{:var=>"nuts"}}}
36 |       )
37 |     end
38 |     it "nested 'and'" do
39 |       expect(parser.parse('(nuts and cranberries) or apples')).to eq(
40 |         { or: {
41 |           left: { and: { left: {var: "nuts"}, right: {var:"cranberries"}}},
42 |           right: { var: "apples" }
43 |           }
44 |         }
45 |       )
46 |     end
47 | 
48 |   end
49 | 
50 | end
51 | 


--------------------------------------------------------------------------------
/spec/lib/expression/variables_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'expression/parser'
 2 | require 'expression/variables'
 3 | 
 4 | describe Expression::Variables do
 5 | 
 6 |   let(:parser) { Expression::Parser.new }
 7 |   let(:variables) { Expression::Variables.new }
 8 |   it "gets one variable name" do
 9 |     expect(variables.apply(parser.parse('one'))).to eq(['one'])
10 |   end
11 |   it "preserves case " do
12 |     expect(variables.apply(parser.parse('ONe'))).to eq(['ONe'])
13 |   end
14 |   it "multiple variables" do
15 |     expect(variables.apply(parser.parse('fox or cow or goat'))).to eq(%w[fox cow goat])
16 |   end
17 | 
18 | end
19 | 


--------------------------------------------------------------------------------
/spec/lib/expression_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'expression/expression'
 2 | 
 3 | describe Expression do
 4 |   context "simple or expression" do
 5 |     it "can find variables" do
 6 |       expr = "ONE or TWO"
 7 |       expect(Expression.new(expr).variables).to eq(%w(ONE TWO))
 8 |     end
 9 | 
10 |     it "evaluates: 0 OR 1 to be 1" do
11 |       expr = "f or t"
12 |       values = {f:0, t:1}
13 |       expect(Expression.new(expr).evaluate(values)).to eq(1)
14 |     end
15 | 
16 |     it "evaluates: 1 OR 0 to be 1" do
17 |       expr = "t or f"
18 |       values = {f:0, t:1}
19 |       expect(Expression.new(expr).evaluate(values)).to eq(1)
20 |     end
21 | 
22 |     it "evaluates: 0 OR 0 to be 0" do
23 |       expr = "f1 or f2"
24 |       values = {f1:0, f2:0}
25 |       expect(Expression.new(expr).evaluate(values)).to eq(0)
26 |     end
27 | 
28 |     it "evaluates: 1 OR 1 to be 1" do
29 |       expr = "t1 or t2"
30 |       values = {t1:1, t2:1}
31 |       expect(Expression.new(expr).evaluate(values)).to eq(1)
32 |     end
33 | 
34 |     it "evaluates: 1 OR nil to be 1" do
35 |       expr = "t1 or t2"
36 |       values = {t1:1, t2:nil}
37 |       expect(Expression.new(expr).evaluate(values)).to eq(1)
38 |     end
39 | 
40 |     it "evaluates: 0 OR nil to be nil" do
41 |       expr = "t1 or t2"
42 |       values = {t1:0, t2:nil}
43 |       expect(Expression.new(expr).evaluate(values)).to eq(nil)
44 |     end
45 | 
46 |     it "evaluates: nil OR 0 to be 0" do
47 |       expr = "t1 or t2"
48 |       values = {t1:nil, t2:0}
49 |       expect(Expression.new(expr).evaluate(values)).to eq(0)
50 |     end
51 | 
52 |     it "evaluates: nil OR nil to be nil" do
53 |       expr = "t1 or t2"
54 |       values = {t1:nil, t2:nil}
55 |       expect(Expression.new(expr).evaluate(values)).to eq(nil)
56 |     end
57 |   end
58 | end
59 | 


--------------------------------------------------------------------------------
/spec/lib/nested_hash_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'nested_hash'
 2 | 
 3 | describe NestedHash do
 4 |   let(:input) { {"loc.x" => 1, "loc.y" => 2, "foo.a" => 10, "foo.b" => 20, "foo.c.baz" => 3,}}
 5 |   let(:expected) {{"loc" => {"x" => 1, "y" => 2}, "foo" => {"a" => 10, "b" => 20, "c" => { "baz" => 3}}}}
 6 |   let(:expected_updated) {{"loc" => {"x" => 1, "y" => 2}, "foo" => {"a" => 10, "b" => 20, "c" => { "baz" => "buzz"}}}}
 7 | 
 8 |   let(:symbol_keys) { {x:1, y:2}}
 9 |   let(:symbol_keys_result) { {'x' => 1, 'y' => 2}}
10 | 
11 | 
12 |   it ".add created nested hash elements for string keys with '.'" do
13 |     result = NestedHash.new.add(input)
14 |     expect(result).to eq(expected)
15 |   end
16 | 
17 |   it "does no harm when initialized with an already nested hash" do
18 |     expect(NestedHash.new(expected)).to eq(expected)
19 |   end
20 | 
21 |   context "methods" do
22 |     let (:result) { NestedHash.new(input) }
23 |     it "can initialize with another Hash" do
24 |       expect(result).to eq(expected)
25 |     end
26 | 
27 |     it "can generate dotkeys" do
28 |       expect(result.dotkeys.sort).to eq(input.keys.sort)
29 |     end
30 | 
31 |     it "withdotkeys generates keys with '.'" do
32 |       expect(result.withdotkeys).to eq(input)
33 |     end
34 | 
35 |     it "dotkeys and withdotkeys have same order" do
36 |       expect(result.withdotkeys.keys).to eq(result.dotkeys)
37 |     end
38 | 
39 |     it "can set a nested key's value by a dotted-string" do
40 |       result.dotkey_set('foo.c.baz', 'buzz')
41 |       expect(result).to eq(expected_updated)
42 |     end
43 |   end
44 | 
45 | 
46 |   it "turns symbol keys into simple strings" do
47 |     result = NestedHash.new.add(symbol_keys)
48 |     expect(result).to eq(symbol_keys_result)
49 |   end
50 | 
51 |   context "deeply nested" do
52 |     let(:input) { {"info.loc.x" => 0.11, "info.loc.y" => 0.222, "foo.a" => 10, "foo.b" => 20}}
53 |     let(:expected) { {"info" => {"loc" => {"x" => 0.11, "y" => 0.222}}, "foo" => {"a" => 10, "b" => 20}}}
54 | 
55 |     it "creates nested hash elements for string keys with '.'" do
56 |       result = NestedHash.new.add(input)
57 |       expect(result).to eq(expected)
58 |     end
59 | 
60 |   end
61 | 
62 | end
63 | 


--------------------------------------------------------------------------------
/spec/lib/zipcode_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'zipcode/zipcode'
 2 | 
 3 | describe Zipcode do
 4 |   it "gives a location based on zipcode" do
 5 |     location = Zipcode.latlon('94132')
 6 |     expect(location).to eq(lat: 37.7211, lon: -122.4754)
 7 |   end
 8 |   it "supports zipcode given as a number" do
 9 |     location = Zipcode.latlon(94132)
10 |     expect(location).to eq(lat: 37.7211, lon: -122.4754)
11 |   end
12 | 
13 |   describe '#valid' do
14 |     it "returns true if the zipcode is valid" do
15 |       expect(Zipcode.valid? 94132).to eq(true)
16 |     end
17 |     it "returns false if the zipcode is invalid" do
18 |       expect(Zipcode.valid? 00002).to eq(false)
19 |     end
20 |   end
21 | end
22 | 


--------------------------------------------------------------------------------
/spec/spec.rake:
--------------------------------------------------------------------------------
 1 | begin
 2 |   require 'rspec/core/rake_task'
 3 | 
 4 |   spec_tasks = Dir['spec/*/'].each_with_object([]) do |d, result|
 5 |     result << File.basename(d) unless Dir["#{d}*"].empty?
 6 |   end
 7 | 
 8 |   spec_tasks.each do |folder|
 9 |     desc "Run the spec suite in #{folder}"
10 |     RSpec::Core::RakeTask.new("spec:#{folder}") do |t|
11 |       t.pattern = "./spec/#{folder}/**/*_spec.rb"
12 |       t.rspec_opts = "--color"
13 |     end
14 |   end
15 | 
16 |   desc "Run complete application spec suite"
17 |   RSpec::Core::RakeTask.new(:spec)
18 | rescue LoadError
19 |   puts "RSpec is not part of this bundle, skip specs."
20 | end
21 | 


--------------------------------------------------------------------------------
/spec/spec_helper.rb:
--------------------------------------------------------------------------------
 1 | require 'simplecov'
 2 | SimpleCov.start
 3 | 
 4 | ENV['DATA_PATH']  = nil
 5 | ENV['RACK_ENV'] ||= 'test'
 6 | RACK_ENV          = ENV['RACK_ENV'] unless defined?(RACK_ENV)
 7 | 
 8 | #require File.expand_path(File.dirname(__FILE__) + "/../config/boot")
 9 | require_relative '../config/env.rb'
10 | Dir[File.expand_path(File.dirname(__FILE__) + "/../app/helpers/**/*.rb")].each(&method(:require))
11 | 
12 | RSpec.configure do |config|
13 |   config.include Rack::Test::Methods
14 | 
15 |   config.before(:type => :feature) do
16 |     # load the Padrino web app defined in app/app.rb
17 |     require_relative '../config/boot'
18 |   end
19 |   config.before do
20 |     ENV['DATA_PATH'] = nil
21 |   end
22 | end
23 | 
24 | # You can use this method to custom specify a Rack app
25 | # you want rack-test to invoke:
26 | #
27 | #   app OpenDataMaker::App
28 | #   app OpenDataMaker::App.tap { |a| }
29 | #   app(OpenDataMaker::App) do
30 | #     set :foo, :bar
31 | #   end
32 | #
33 | def app(app = nil, &blk)
34 |   @app ||= block_given? ? app.instance_eval(&blk) : app
35 |   @app ||= Padrino.application
36 | end
37 | 


--------------------------------------------------------------------------------
/spec/tasks/import_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'bundler/setup'
 3 | require 'padrino-core/cli/rake'
 4 | 
 5 | describe 'elastic search index management rake task' do
 6 |   before do
 7 |     PadrinoTasks.init
 8 |     DataMagic.init(load_now: true)
 9 |   end
10 | 
11 |   after do
12 |     DataMagic.destroy
13 |   end
14 | 
15 |   context "imports" do
16 |     it "default sample-data" do
17 |       ENV['DATA_PATH'] = nil
18 |       expect { Rake::Task['import'].invoke }.not_to raise_exception
19 |     end
20 | 
21 |     it "correct configuration" do
22 |       dir_path = './spec/fixtures/import_with_dictionary'
23 |       ENV['DATA_PATH'] = dir_path
24 |       expect { Rake::Task['import'].invoke }.not_to raise_exception
25 |       expect(DataMagic.config.api_endpoint_names).to eq(['cities'])
26 |     end
27 | 
28 |   end
29 | 
30 | end
31 | 


--------------------------------------------------------------------------------
/tasks/delta.rake:
--------------------------------------------------------------------------------
 1 | require_relative '../lib/data_magic.rb'
 2 | 
 3 | desc 'delta update current ES index with delta data file. ARG 1 must be title as specified in data.yaml, e.g., Most-Recent-Cohorts-All-Data-Elements.csv. ARG 2 is the filename of the new delta file nested in a "<DATA_PATH>/delta/<DELTA_CSV_FILE>" subdirectory. USAGE delta[ORIGINAL_FILENAME.csv,DELTA_FILENAME.csv]'
 4 | task :delta, [:original, :update] => :environment do |t, args|
 5 |   options = {}
 6 |   options[:delta_original] = args[:original] || 'Most-Recent-Cohorts-All-Data-Elements.csv'
 7 |   options[:delta_update] = args[:update]
 8 |   DataMagic.import_with_delta(options)
 9 | end
10 | 


--------------------------------------------------------------------------------
/tasks/es.rake:
--------------------------------------------------------------------------------
 1 | require_relative '../lib/data_magic.rb'
 2 | 
 3 | namespace :es do
 4 |   desc "delete elasticsearch index (_all for all)"
 5 |   task :delete, [:index_name] => :environment do |t, args|
 6 |     DataMagic.client.indices.delete(index: args[:index_name])
 7 |   end
 8 | 
 9 |   desc "list elasticsearch indices"
10 |   task :list => :environment do |t, args|
11 |     result = DataMagic.client.indices.get(index: '_all').keys
12 |     puts result.join("\n")
13 |   end
14 | end
15 | 


--------------------------------------------------------------------------------
/tasks/import.rake:
--------------------------------------------------------------------------------
 1 | require_relative '../lib/data_magic.rb'
 2 | require 'ruby-prof'
 3 | 
 4 | desc "import files from DATA_PATH via rake import, optionally rake import[filename.csv] to restart import from a specific file, also rake import[nil,profile=true] for profile output."
 5 | task :import, [:continue, :profile] => :environment do |t, args|
 6 |   options = {}
 7 |   options[:continue] = args[:continue]
 8 |   start_time = Time.now
 9 |   RubyProf.start if args[:profile]
10 | 
11 |   DataMagic.import_with_dictionary(options)
12 | 
13 |   if args[:profile]
14 |       result = RubyProf.stop
15 |     end_time = Time.now
16 |     puts "indexing complete: #{distance_of_time_in_words(end_time, start_time)}"
17 |     puts "duration: #{end_time - start_time}"
18 | 
19 |     printer = RubyProf::MultiPrinter.new(result);
20 |     printer.print(path: ".", profile: "profile", min_percent: 2)
21 |   end
22 | end
23 | 


--------------------------------------------------------------------------------
/test_coverage_snapshots/10_3_19/.last_run.json:
--------------------------------------------------------------------------------
1 | {
2 |   "result": {
3 |     "covered_percent": 40.87
4 |   }
5 | }
6 | 


--------------------------------------------------------------------------------
/test_coverage_snapshots/10_3_19/.resultset.json.lock:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/.resultset.json.lock


--------------------------------------------------------------------------------
/test_coverage_snapshots/10_3_19/assets/0.10.2/colorbox/border.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/colorbox/border.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/10_3_19/assets/0.10.2/colorbox/controls.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/colorbox/controls.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/10_3_19/assets/0.10.2/colorbox/loading.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/colorbox/loading.gif


--------------------------------------------------------------------------------
/test_coverage_snapshots/10_3_19/assets/0.10.2/colorbox/loading_background.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/colorbox/loading_background.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/10_3_19/assets/0.10.2/favicon_green.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/favicon_green.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/10_3_19/assets/0.10.2/favicon_red.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/favicon_red.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/10_3_19/assets/0.10.2/favicon_yellow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/favicon_yellow.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/10_3_19/assets/0.10.2/loading.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/loading.gif


--------------------------------------------------------------------------------
/test_coverage_snapshots/10_3_19/assets/0.10.2/magnify.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/magnify.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_flat_0_aaaaaa_40x100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_flat_0_aaaaaa_40x100.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_flat_75_ffffff_40x100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_flat_75_ffffff_40x100.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_glass_55_fbf9ee_1x400.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_glass_55_fbf9ee_1x400.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_glass_65_ffffff_1x400.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_glass_65_ffffff_1x400.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_glass_75_dadada_1x400.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_glass_75_dadada_1x400.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_glass_75_e6e6e6_1x400.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_glass_75_e6e6e6_1x400.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_glass_95_fef1ec_1x400.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_glass_95_fef1ec_1x400.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_highlight-soft_75_cccccc_1x100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_highlight-soft_75_cccccc_1x100.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-icons_222222_256x240.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-icons_222222_256x240.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-icons_2e83ff_256x240.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-icons_2e83ff_256x240.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-icons_454545_256x240.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-icons_454545_256x240.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-icons_888888_256x240.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-icons_888888_256x240.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-icons_cd0a0a_256x240.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-icons_cd0a0a_256x240.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/3_17_20/.last_run.json:
--------------------------------------------------------------------------------
1 | {
2 |   "result": {
3 |     "covered_percent": 45.3
4 |   }
5 | }
6 | 


--------------------------------------------------------------------------------
/test_coverage_snapshots/3_17_20/.resultset.json.lock:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/.resultset.json.lock


--------------------------------------------------------------------------------
/test_coverage_snapshots/3_17_20/assets/0.10.2/colorbox/border.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/colorbox/border.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/3_17_20/assets/0.10.2/colorbox/controls.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/colorbox/controls.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/3_17_20/assets/0.10.2/colorbox/loading.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/colorbox/loading.gif


--------------------------------------------------------------------------------
/test_coverage_snapshots/3_17_20/assets/0.10.2/colorbox/loading_background.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/colorbox/loading_background.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/3_17_20/assets/0.10.2/favicon_green.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/favicon_green.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/3_17_20/assets/0.10.2/favicon_red.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/favicon_red.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/3_17_20/assets/0.10.2/favicon_yellow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/favicon_yellow.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/3_17_20/assets/0.10.2/loading.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/loading.gif


--------------------------------------------------------------------------------
/test_coverage_snapshots/3_17_20/assets/0.10.2/magnify.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/magnify.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_flat_0_aaaaaa_40x100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_flat_0_aaaaaa_40x100.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_flat_75_ffffff_40x100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_flat_75_ffffff_40x100.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_glass_55_fbf9ee_1x400.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_glass_55_fbf9ee_1x400.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_glass_65_ffffff_1x400.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_glass_65_ffffff_1x400.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_glass_75_dadada_1x400.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_glass_75_dadada_1x400.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_glass_75_e6e6e6_1x400.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_glass_75_e6e6e6_1x400.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_glass_95_fef1ec_1x400.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_glass_95_fef1ec_1x400.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_highlight-soft_75_cccccc_1x100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_highlight-soft_75_cccccc_1x100.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-icons_222222_256x240.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-icons_222222_256x240.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-icons_2e83ff_256x240.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-icons_2e83ff_256x240.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-icons_454545_256x240.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-icons_454545_256x240.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-icons_888888_256x240.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-icons_888888_256x240.png


--------------------------------------------------------------------------------
/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-icons_cd0a0a_256x240.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-icons_cd0a0a_256x240.png


--------------------------------------------------------------------------------
/wait-for-it.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | # Use this script to test if a given TCP host/port are available
  3 | # https://github.com/vishnubob/wait-for-it
  4 | 
  5 | WAITFORIT_cmdname=${0##*/}
  6 | 
  7 | echoerr() { if [[ $WAITFORIT_QUIET -ne 1 ]]; then echo "$@" 1>&2; fi }
  8 | 
  9 | usage()
 10 | {
 11 |     cat << USAGE >&2
 12 | Usage:
 13 |     $WAITFORIT_cmdname host:port [-s] [-t timeout] [-- command args]
 14 |     -h HOST | --host=HOST       Host or IP under test
 15 |     -p PORT | --port=PORT       TCP port under test
 16 |                                 Alternatively, you specify the host and port as host:port
 17 |     -s | --strict               Only execute subcommand if the test succeeds
 18 |     -q | --quiet                Don't output any status messages
 19 |     -t TIMEOUT | --timeout=TIMEOUT
 20 |                                 Timeout in seconds, zero for no timeout
 21 |     -- COMMAND ARGS             Execute command with args after the test finishes
 22 | USAGE
 23 |     exit 1
 24 | }
 25 | 
 26 | wait_for()
 27 | {
 28 |     if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then
 29 |         echoerr "$WAITFORIT_cmdname: waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT"
 30 |     else
 31 |         echoerr "$WAITFORIT_cmdname: waiting for $WAITFORIT_HOST:$WAITFORIT_PORT without a timeout"
 32 |     fi
 33 |     WAITFORIT_start_ts=$(date +%s)
 34 |     while :
 35 |     do
 36 |         if [[ $WAITFORIT_ISBUSY -eq 1 ]]; then
 37 |             nc -z $WAITFORIT_HOST $WAITFORIT_PORT
 38 |             WAITFORIT_result=$?
 39 |         else
 40 |             (echo -n > /dev/tcp/$WAITFORIT_HOST/$WAITFORIT_PORT) >/dev/null 2>&1
 41 |             WAITFORIT_result=$?
 42 |         fi
 43 |         if [[ $WAITFORIT_result -eq 0 ]]; then
 44 |             WAITFORIT_end_ts=$(date +%s)
 45 |             echoerr "$WAITFORIT_cmdname: $WAITFORIT_HOST:$WAITFORIT_PORT is available after $((WAITFORIT_end_ts - WAITFORIT_start_ts)) seconds"
 46 |             break
 47 |         fi
 48 |         sleep 1
 49 |     done
 50 |     return $WAITFORIT_result
 51 | }
 52 | 
 53 | wait_for_wrapper()
 54 | {
 55 |     # In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692
 56 |     if [[ $WAITFORIT_QUIET -eq 1 ]]; then
 57 |         timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --quiet --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT &
 58 |     else
 59 |         timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT &
 60 |     fi
 61 |     WAITFORIT_PID=$!
 62 |     trap "kill -INT -$WAITFORIT_PID" INT
 63 |     wait $WAITFORIT_PID
 64 |     WAITFORIT_RESULT=$?
 65 |     if [[ $WAITFORIT_RESULT -ne 0 ]]; then
 66 |         echoerr "$WAITFORIT_cmdname: timeout occurred after waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT"
 67 |     fi
 68 |     return $WAITFORIT_RESULT
 69 | }
 70 | 
 71 | # process arguments
 72 | while [[ $# -gt 0 ]]
 73 | do
 74 |     case "$1" in
 75 |         *:* )
 76 |         WAITFORIT_hostport=(${1//:/ })
 77 |         WAITFORIT_HOST=${WAITFORIT_hostport[0]}
 78 |         WAITFORIT_PORT=${WAITFORIT_hostport[1]}
 79 |         shift 1
 80 |         ;;
 81 |         --child)
 82 |         WAITFORIT_CHILD=1
 83 |         shift 1
 84 |         ;;
 85 |         -q | --quiet)
 86 |         WAITFORIT_QUIET=1
 87 |         shift 1
 88 |         ;;
 89 |         -s | --strict)
 90 |         WAITFORIT_STRICT=1
 91 |         shift 1
 92 |         ;;
 93 |         -h)
 94 |         WAITFORIT_HOST="$2"
 95 |         if [[ $WAITFORIT_HOST == "" ]]; then break; fi
 96 |         shift 2
 97 |         ;;
 98 |         --host=*)
 99 |         WAITFORIT_HOST="${1#*=}"
100 |         shift 1
101 |         ;;
102 |         -p)
103 |         WAITFORIT_PORT="$2"
104 |         if [[ $WAITFORIT_PORT == "" ]]; then break; fi
105 |         shift 2
106 |         ;;
107 |         --port=*)
108 |         WAITFORIT_PORT="${1#*=}"
109 |         shift 1
110 |         ;;
111 |         -t)
112 |         WAITFORIT_TIMEOUT="$2"
113 |         if [[ $WAITFORIT_TIMEOUT == "" ]]; then break; fi
114 |         shift 2
115 |         ;;
116 |         --timeout=*)
117 |         WAITFORIT_TIMEOUT="${1#*=}"
118 |         shift 1
119 |         ;;
120 |         --)
121 |         shift
122 |         WAITFORIT_CLI=("$@")
123 |         break
124 |         ;;
125 |         --help)
126 |         usage
127 |         ;;
128 |         *)
129 |         echoerr "Unknown argument: $1"
130 |         usage
131 |         ;;
132 |     esac
133 | done
134 | 
135 | if [[ "$WAITFORIT_HOST" == "" || "$WAITFORIT_PORT" == "" ]]; then
136 |     echoerr "Error: you need to provide a host and port to test."
137 |     usage
138 | fi
139 | 
140 | WAITFORIT_TIMEOUT=${WAITFORIT_TIMEOUT:-15}
141 | WAITFORIT_STRICT=${WAITFORIT_STRICT:-0}
142 | WAITFORIT_CHILD=${WAITFORIT_CHILD:-0}
143 | WAITFORIT_QUIET=${WAITFORIT_QUIET:-0}
144 | 
145 | # Check to see if timeout is from busybox?
146 | WAITFORIT_TIMEOUT_PATH=$(type -p timeout)
147 | WAITFORIT_TIMEOUT_PATH=$(realpath $WAITFORIT_TIMEOUT_PATH 2>/dev/null || readlink -f $WAITFORIT_TIMEOUT_PATH)
148 | 
149 | WAITFORIT_BUSYTIMEFLAG=""
150 | if [[ $WAITFORIT_TIMEOUT_PATH =~ "busybox" ]]; then
151 |     WAITFORIT_ISBUSY=1
152 |     # Check if busybox timeout uses -t flag
153 |     # (recent Alpine versions don't support -t anymore)
154 |     if timeout &>/dev/stdout | grep -q -e '-t '; then
155 |         WAITFORIT_BUSYTIMEFLAG="-t"
156 |     fi
157 | else
158 |     WAITFORIT_ISBUSY=0
159 | fi
160 | 
161 | if [[ $WAITFORIT_CHILD -gt 0 ]]; then
162 |     wait_for
163 |     WAITFORIT_RESULT=$?
164 |     exit $WAITFORIT_RESULT
165 | else
166 |     if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then
167 |         wait_for_wrapper
168 |         WAITFORIT_RESULT=$?
169 |     else
170 |         wait_for
171 |         WAITFORIT_RESULT=$?
172 |     fi
173 | fi
174 | 
175 | if [[ $WAITFORIT_CLI != "" ]]; then
176 |     if [[ $WAITFORIT_RESULT -ne 0 && $WAITFORIT_STRICT -eq 1 ]]; then
177 |         echoerr "$WAITFORIT_cmdname: strict mode, refusing to execute subprocess"
178 |         exit $WAITFORIT_RESULT
179 |     fi
180 |     exec "${WAITFORIT_CLI[@]}"
181 | else
182 |     exit $WAITFORIT_RESULT
183 | fi
184 | 


--------------------------------------------------------------------------------