├── .circleci └── config.yml ├── .components ├── .dockerignore ├── .github └── workflows │ └── ruby.yml ├── .gitignore ├── .rubocop.yml ├── .ruby-gemset ├── .ruby-version ├── API.md ├── CONTRIBUTING.md ├── DICTIONARY.md ├── Dockerfile ├── Gemfile ├── Gemfile.lock ├── INSTALL.md ├── LICENSE.md ├── Procfile ├── README.md ├── Rakefile ├── app ├── app.rb ├── controllers.rb ├── index_app.rb ├── stylesheets │ └── application.sass └── views │ ├── category.liquid │ ├── home.liquid │ └── layouts │ └── application.erb ├── bin └── open-data-maker ├── config.ru ├── config ├── apps.rb ├── boot.rb ├── env.rb ├── puma.rb └── unicorn.rb ├── docker-compose.yml ├── lib ├── data_magic.rb ├── data_magic │ ├── category.rb │ ├── config.rb │ ├── error_checker.rb │ ├── example.rb │ ├── index.rb │ ├── index │ │ ├── builder_data.rb │ │ ├── document.rb │ │ ├── document_builder.rb │ │ ├── event_logger.rb │ │ ├── importer.rb │ │ ├── output.rb │ │ ├── repository.rb │ │ ├── row_bulk_importer.rb │ │ ├── row_importer.rb │ │ ├── row_map.rb │ │ └── super_client.rb │ ├── nested_query_builder.rb │ └── query_builder.rb ├── expression │ ├── eval.rb │ ├── expression.rb │ ├── parser.rb │ └── variables.rb ├── nested_hash.rb ├── sass_initializer.rb └── zipcode │ ├── us_zipcodes.txt │ └── zipcode.rb ├── log └── .gitkeep ├── public ├── favicon.ico └── javascripts │ ├── application.js │ ├── jquery-ujs.js │ └── jquery.js ├── sample-data ├── cities100.csv └── data.yaml ├── script ├── bomstrip.sh ├── bootstrap ├── makeutf8.sh ├── s3config.rb ├── s3pull └── s3push ├── spec ├── features │ ├── api_spec.rb │ └── web_spec.rb ├── fixtures │ ├── bom │ │ ├── bom.csv │ │ └── data.yaml │ ├── calculated_columns │ │ ├── data.yaml │ │ └── schools.csv │ ├── cities_with_yml │ │ ├── cities50.csv │ │ ├── cities51-100.csv │ │ ├── data.yml │ │ └── more.csv │ ├── cities_without_yml │ │ ├── cities50.csv │ │ ├── cities51-100.csv │ │ └── more.csv │ ├── data.rb │ ├── geo │ │ ├── data.yaml │ │ └── places.csv │ ├── geo_no_files │ │ └── data.yaml │ ├── import_with_dictionary │ │ ├── cities50.csv │ │ ├── cities51-100.csv │ │ ├── data.yaml │ │ └── more.csv │ ├── import_with_errors │ │ ├── cities4.csv │ │ └── data.yaml │ ├── import_with_null_value │ │ ├── data.yaml │ │ └── null_values.csv │ ├── import_with_options │ │ ├── cities4.csv │ │ ├── data.yaml │ │ └── more_cities.csv │ ├── invalid_utf8.csv │ ├── minimal │ │ └── data.yaml │ ├── nested2 │ │ ├── data.yaml │ │ └── school2013.csv │ ├── nested_data_type │ │ └── data.yml │ ├── nested_delta_files │ │ ├── data.yaml │ │ ├── delta │ │ │ └── latest-school-data_update1.csv │ │ ├── latest-school-data.csv │ │ ├── school2012.csv │ │ └── school2013.csv │ ├── nested_files │ │ ├── data.yaml │ │ ├── school-data.csv │ │ ├── school2011.csv │ │ ├── school2012.csv │ │ └── school2013.csv │ ├── numeric_data │ │ └── data.yaml │ ├── sample-data │ │ ├── cities100.csv │ │ └── data.yaml │ ├── school_names │ │ ├── data.yaml │ │ └── school_names.csv │ ├── schools │ │ ├── data.yaml │ │ └── schools.csv │ └── types │ │ ├── data.yaml │ │ └── places.csv ├── lib │ ├── data_magic │ │ ├── calculated_columns_spec.rb │ │ ├── config_field_types_spec.rb │ │ ├── config_spec.rb │ │ ├── create_index_spec.rb │ │ ├── error_checker_spec.rb │ │ ├── example_spec.rb │ │ ├── import_csv_spec.rb │ │ ├── import_with_delta_file_spec.rb │ │ ├── import_with_dictionary_spec.rb │ │ ├── import_with_nested_files_spec.rb │ │ ├── import_without_data_yaml_spec.rb │ │ ├── index │ │ │ ├── document_builder_spec.rb │ │ │ ├── document_spec.rb │ │ │ ├── event_logger_spec.rb │ │ │ ├── importer_spec.rb │ │ │ └── repository_spec.rb │ │ ├── name_type_spec.rb │ │ ├── nested_and_non_nested_combos_spec.rb │ │ ├── nested_data_type_spec.rb │ │ ├── query_builder_spec.rb │ │ ├── search_name_spec.rb │ │ └── search_spec.rb │ ├── data_magic_spec.rb │ ├── expression │ │ ├── eval_spec.rb │ │ ├── parser_spec.rb │ │ └── variables_spec.rb │ ├── expression_spec.rb │ ├── nested_hash_spec.rb │ └── zipcode_spec.rb ├── spec.rake ├── spec_helper.rb └── tasks │ └── import_spec.rb ├── tasks ├── delta.rake ├── es.rake └── import.rake ├── test_coverage_snapshots ├── 10_3_19 │ ├── .last_run.json │ ├── .resultset.json │ ├── .resultset.json.lock │ ├── assets │ │ └── 0.10.2 │ │ │ ├── application.css │ │ │ ├── application.js │ │ │ ├── colorbox │ │ │ ├── border.png │ │ │ ├── controls.png │ │ │ ├── loading.gif │ │ │ └── loading_background.png │ │ │ ├── favicon_green.png │ │ │ ├── favicon_red.png │ │ │ ├── favicon_yellow.png │ │ │ ├── loading.gif │ │ │ ├── magnify.png │ │ │ └── smoothness │ │ │ └── images │ │ │ ├── ui-bg_flat_0_aaaaaa_40x100.png │ │ │ ├── ui-bg_flat_75_ffffff_40x100.png │ │ │ ├── ui-bg_glass_55_fbf9ee_1x400.png │ │ │ ├── ui-bg_glass_65_ffffff_1x400.png │ │ │ ├── ui-bg_glass_75_dadada_1x400.png │ │ │ ├── ui-bg_glass_75_e6e6e6_1x400.png │ │ │ ├── ui-bg_glass_95_fef1ec_1x400.png │ │ │ ├── ui-bg_highlight-soft_75_cccccc_1x100.png │ │ │ ├── ui-icons_222222_256x240.png │ │ │ ├── ui-icons_2e83ff_256x240.png │ │ │ ├── ui-icons_454545_256x240.png │ │ │ ├── ui-icons_888888_256x240.png │ │ │ └── ui-icons_cd0a0a_256x240.png │ └── index.html └── 3_17_20 │ ├── .last_run.json │ ├── .resultset.json │ ├── .resultset.json.lock │ ├── assets │ └── 0.10.2 │ │ ├── application.css │ │ ├── application.js │ │ ├── colorbox │ │ ├── border.png │ │ ├── controls.png │ │ ├── loading.gif │ │ └── loading_background.png │ │ ├── favicon_green.png │ │ ├── favicon_red.png │ │ ├── favicon_yellow.png │ │ ├── loading.gif │ │ ├── magnify.png │ │ └── smoothness │ │ └── images │ │ ├── ui-bg_flat_0_aaaaaa_40x100.png │ │ ├── ui-bg_flat_75_ffffff_40x100.png │ │ ├── ui-bg_glass_55_fbf9ee_1x400.png │ │ ├── ui-bg_glass_65_ffffff_1x400.png │ │ ├── ui-bg_glass_75_dadada_1x400.png │ │ ├── ui-bg_glass_75_e6e6e6_1x400.png │ │ ├── ui-bg_glass_95_fef1ec_1x400.png │ │ ├── ui-bg_highlight-soft_75_cccccc_1x100.png │ │ ├── ui-icons_222222_256x240.png │ │ ├── ui-icons_2e83ff_256x240.png │ │ ├── ui-icons_454545_256x240.png │ │ ├── ui-icons_888888_256x240.png │ │ └── ui-icons_cd0a0a_256x240.png │ └── index.html └── wait-for-it.sh /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | shared: &shared 4 | working_directory: ~/repo 5 | steps: 6 | - checkout 7 | 8 | # Restore bundle cache 9 | - restore_cache: 10 | keys: 11 | - bundle-cache-{{ checksum "Gemfile.lock" }} 12 | - bundle-cache- 13 | 14 | - run: 15 | name: Bundle Install 16 | command: bundle install --path vendor/bundle 17 | 18 | # Store bundle cache 19 | - save_cache: 20 | key: bundle-cache-{{ checksum "Gemfile.lock" }} 21 | paths: 22 | - vendor/bundle 23 | 24 | - run: 25 | name: Wait for Elasticsearch 26 | command: | 27 | while ! curl -sS --fail http://localhost:9200 > /dev/null 2>&1; do 28 | sleep 5 29 | done 30 | 31 | - run: 32 | name: Create Test Directory 33 | command: mkdir ~/rspec 34 | 35 | - run: 36 | name: RSpec 37 | command: bundle exec rspec --format progress --format RspecJunitFormatter -o ~/rspec/rspec.xml 38 | 39 | # Save test results for timing analysis 40 | - store_test_results: 41 | path: ~/rspec 42 | 43 | jobs: 44 | test-odm: 45 | <<: *shared 46 | docker: 47 | - image: circleci/ruby:2.6.6 48 | environment: 49 | BUNDLE_JOBS: 3 50 | BUNDLE_RETRY: 3 51 | BUNDLE_PATH: vendor/bundle 52 | RACK_ENV: test 53 | - image: elasticsearch:2.4.4 54 | environment: 55 | ES_JAVA_OPTS: "-Dmapper.allow_dots_in_name=true" 56 | 57 | 58 | workflows: 59 | version: 2 60 | build: 61 | jobs: 62 | - test-odm 63 | -------------------------------------------------------------------------------- /.components: -------------------------------------------------------------------------------- 1 | --- 2 | :orm: none 3 | :test: rspec 4 | :mock: none 5 | :script: jquery 6 | :renderer: liquid 7 | :stylesheet: sass 8 | :namespace: OpenDataMaker 9 | :migration_format: number 10 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .dockerignore 2 | real-data -------------------------------------------------------------------------------- /.github/workflows/ruby.yml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. 2 | # They are provided by a third-party and are governed by 3 | # separate terms of service, privacy policy, and support 4 | # documentation. 5 | # This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake 6 | # For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby 7 | 8 | name: Ruby 9 | env: 10 | ES_JAVA_OPTS: "-Dmapper.allow_dots_in_name=true" 11 | BUNDLE_JOBS: 3 12 | BUNDLE_RETRY: 3 13 | BUNDLE_PATH: vendor/bundle 14 | RACK_ENV: test 15 | on: 16 | push: 17 | branches: [ dev, staging ] 18 | pull_request: 19 | branches: [ dev ] 20 | 21 | jobs: 22 | test: 23 | 24 | runs-on: ubuntu-18.04 25 | 26 | steps: 27 | - uses: actions/checkout@v2 28 | - name: Set up Ruby 29 | uses: ruby/setup-ruby@v1 30 | with: 31 | ruby-version: 2.6.6 32 | 33 | - name: Setup ElasticSearch 34 | uses: getong/elasticsearch-action@v1.2 35 | with: 36 | elasticsearch version: 2.4.4 37 | - uses: actions/cache@v2 38 | with: 39 | path: vendor/bundle 40 | key: ${{ runner.os }}-gems-${{ hashFiles('**/Gemfile.lock') }} 41 | restore-keys: | 42 | ${{ runner.os }}-gems- 43 | - name: Bundle install 44 | run: | 45 | bundle config path vendor/bundle 46 | bundle install --jobs 4 --retry 3 47 | - name: Wait for Elasticsearch 48 | run: | 49 | while ! curl -sS --fail http://localhost:9200 > /dev/null 2>&1; do 50 | sleep 5 51 | done 52 | 53 | - name: Create Test Directory 54 | run: mkdir ~/rspec 55 | 56 | - name: RSpec 57 | run: bundle exec rspec --format progress --format RspecJunitFormatter -o ~/rspec/rspec.xml 58 | 59 | - name: Save assets 60 | uses: actions/upload-artifact@v2 61 | with: 62 | name: test-output 63 | path: ~/rspec 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | log/**/* 3 | log/*.log 4 | tmp/**/* 5 | vendor/gems/* 6 | !vendor/gems/cache/ 7 | .sass-cache/* 8 | db/*.db 9 | .*.sw* 10 | .env 11 | .*.env 12 | .cfignore 13 | cf-ssh.yml 14 | .vagrant 15 | .idea/ 16 | *profile* 17 | coverage/ 18 | .bundle 19 | .rakeTasks 20 | 21 | public/stylesheets/application.css* 22 | 23 | # expect people to put their own data in /data 24 | data 25 | 26 | # another commonly used data directory 27 | real-data 28 | 29 | # contains Google API tokens 30 | client_secret.json 31 | 32 | # Docker Data files 33 | ./docker/data/es* 34 | bin -------------------------------------------------------------------------------- /.rubocop.yml: -------------------------------------------------------------------------------- 1 | AllCops: 2 | Exclude: 3 | - 'bin/**/*' 4 | - 'db/**/*' 5 | Metrics/LineLength: 6 | Enabled: false 7 | Style/CommentAnnotation: 8 | Enabled: false 9 | Style/Documentation: 10 | Enabled: false 11 | Style/DotPosition: 12 | Enabled: false 13 | Style/RedundantSelf: 14 | Enabled: false 15 | Style/StringLiterals: 16 | Enabled: false 17 | -------------------------------------------------------------------------------- /.ruby-gemset: -------------------------------------------------------------------------------- 1 | open-data-maker 2 | -------------------------------------------------------------------------------- /.ruby-version: -------------------------------------------------------------------------------- 1 | 2.6.6 2 | -------------------------------------------------------------------------------- /DICTIONARY.md: -------------------------------------------------------------------------------- 1 | # Data 2 | 3 | Details about the data are specified by DATA_PATH/data.yaml. 4 | Where DATA_PATH is an environment variable, which may be: 5 | 6 | * `s3://username:password@bucket_name/path` 7 | * `s3://bucket_name/path` 8 | * `s3://bucket_name` 9 | * a local path like: `./data` 10 | 11 | 12 | This file is loaded the first time it is needed and then stored in memory. The contents of `data.yaml` are stored as JSON in Elasticsearch in a single document of type `config` with id `1`. 13 | 14 | The version field of this document is checked at startup. If the new config has a new version, then we delete the whole index and re-index all of the files referred to in the `data.yaml` files section. 15 | 16 | If no data.yml or data.yaml file is found, then all CSV files in `DATA_PATH` will be loaded, and all fields in their headers will be used. 17 | 18 | For an example data file, visit https://collegescorecard.ed.gov/data/ and download the full data package. A data.yaml file will be included in the ZIP file download. 19 | 20 | # Dictionary Format 21 | 22 | The data dictionary format may be (optionally) specified in the `data.yaml` file. If unspecified, all columns are imported as strings. 23 | 24 | ## Simple Data Types 25 | 26 | ``` 27 | dictionary: 28 | name: 29 | source: COLUMN_NAME 30 | type: integer 31 | description: explanation of where this data comes from and its meaning 32 | ``` 33 | 34 | In the above example: 35 | * `source:` is the name of the column in the csv. (This doesn't have to be all caps, we just find that to be common in government datasets.) 36 | * `type:` may be `integer`, `float`, `string` 37 | * `description:` text description suitable for developer documentation or information provided to data analysts 38 | 39 | ## Calculated columns 40 | 41 | Optionally, you can add "columns" by calculating fields at import based on multiple csv columns. 42 | 43 | ``` 44 | academics.program.degree.health: 45 | calculate: CIP51ASSOC or CIP51BACHL 46 | type: integer 47 | description: Associate or Bachelor's degree in Health 48 | ``` 49 | 50 | Multiple operations are supported. In the following example, if the columns `apples`, `oranges` and `plums` had a `0` value when there were none, and a `1` to represent if they were available, then these values could be combines with `or` to create a data field representing if any were true. 51 | 52 | ``` 53 | fruit: 54 | calculate: apples or oranges or plums 55 | type: integer 56 | description: is there any fruit available? 57 | ``` 58 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ruby:2.6.6 2 | RUN mkdir /myapp 3 | WORKDIR /myapp 4 | COPY Gemfile /myapp/Gemfile 5 | COPY Gemfile.lock /myapp/Gemfile.lock 6 | RUN bundle install 7 | COPY . /myapp 8 | RUN chmod +x /myapp/wait-for-it.sh 9 | EXPOSE 3000 -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | ruby '2.6.6' 3 | 4 | # Distribute your app as a gem 5 | # gemspec 6 | 7 | # Server requirements 8 | # gem 'thin' # or mongrel 9 | # gem 'trinidad', :platform => 'jruby' 10 | 11 | # Optional JSON codec (faster performance) 12 | # gem 'oj' 13 | 14 | # open-data-maker requirements 15 | gem 'elasticsearch' 16 | gem 'typhoeus' 17 | gem 'stretchy' 18 | gem 'hashie' 19 | gem 'cf-app-utils' 20 | #gem 'unicorn' 21 | gem 'puma', '~>4.3' 22 | gem 'safe_yaml' 23 | gem 'aws-sdk', '~> 2' 24 | gem 'actionview', '6.0.3.4' 25 | gem 'dotenv' 26 | gem 'oj' 27 | gem 'parslet' 28 | gem 'parallel' 29 | 30 | # Project requirements 31 | gem 'rake' 32 | 33 | # Component requirements 34 | gem 'sass' 35 | gem 'liquify' 36 | gem 'liquid', '= 3.0.3' 37 | gem 'erubis' 38 | 39 | # Test requirements 40 | group :test do 41 | gem 'rspec' 42 | gem 'rspec-mocks' 43 | gem 'rack-test', :require => 'rack/test' 44 | gem 'rspec_junit_formatter' 45 | gem 'simplecov', require: false 46 | end 47 | 48 | group 'dev' do 49 | gem 'ruby-prof' 50 | 51 | end 52 | # Padrino Stable Gem 53 | gem 'padrino', '~> 0.15' 54 | 55 | gem 'pry', :group => ['development', 'test'] 56 | gem 'pry-byebug', :group => ['development', 'test'] 57 | gem 'newrelic_rpm' 58 | 59 | # Or Padrino Edge 60 | # gem 'padrino', :github => 'padrino/padrino-framework' 61 | 62 | # Or Individual Gems 63 | # %w(core support gen helpers cache mailer admin).each do |g| 64 | # gem 'padrino-' + g, '0.12.5' 65 | # end 66 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: https://rubygems.org/ 3 | specs: 4 | actionview (6.0.3.4) 5 | activesupport (= 6.0.3.4) 6 | builder (~> 3.1) 7 | erubi (~> 1.4) 8 | rails-dom-testing (~> 2.0) 9 | rails-html-sanitizer (~> 1.1, >= 1.2.0) 10 | activesupport (6.0.3.4) 11 | concurrent-ruby (~> 1.0, >= 1.0.2) 12 | i18n (>= 0.7, < 2) 13 | minitest (~> 5.1) 14 | tzinfo (~> 1.1) 15 | zeitwerk (~> 2.2, >= 2.2.2) 16 | aws-eventstream (1.1.0) 17 | aws-sdk (2.11.632) 18 | aws-sdk-resources (= 2.11.632) 19 | aws-sdk-core (2.11.632) 20 | aws-sigv4 (~> 1.0) 21 | jmespath (~> 1.0) 22 | aws-sdk-resources (2.11.632) 23 | aws-sdk-core (= 2.11.632) 24 | aws-sigv4 (1.2.2) 25 | aws-eventstream (~> 1, >= 1.0.2) 26 | builder (3.2.4) 27 | byebug (11.1.3) 28 | cf-app-utils (0.6) 29 | coderay (1.1.3) 30 | concurrent-ruby (1.1.7) 31 | crass (1.0.6) 32 | diff-lcs (1.4.4) 33 | docile (1.3.4) 34 | dotenv (2.7.6) 35 | elasticsearch (5.0.5) 36 | elasticsearch-api (= 5.0.5) 37 | elasticsearch-transport (= 5.0.5) 38 | elasticsearch-api (5.0.5) 39 | multi_json 40 | elasticsearch-transport (5.0.5) 41 | faraday 42 | multi_json 43 | erubi (1.10.0) 44 | erubis (2.7.0) 45 | ethon (0.12.0) 46 | ffi (>= 1.3.0) 47 | excon (0.78.1) 48 | faraday (1.3.0) 49 | faraday-net_http (~> 1.0) 50 | multipart-post (>= 1.2, < 3) 51 | ruby2_keywords 52 | faraday-net_http (1.0.0) 53 | ffi (1.14.2) 54 | hashie (4.1.0) 55 | i18n (1.8.7) 56 | concurrent-ruby (~> 1.0) 57 | jmespath (1.4.0) 58 | liquid (3.0.3) 59 | liquify (0.2.7) 60 | liquid (>= 2.2.2) 61 | loofah (2.8.0) 62 | crass (~> 1.0.2) 63 | nokogiri (>= 1.5.9) 64 | mail (2.7.1) 65 | mini_mime (>= 0.1.1) 66 | method_source (1.0.0) 67 | mime-types (3.3.1) 68 | mime-types-data (~> 3.2015) 69 | mime-types-data (3.2020.1104) 70 | mini_mime (1.0.2) 71 | mini_portile2 (2.5.0) 72 | minitest (5.14.3) 73 | moneta (1.1.1) 74 | multi_json (1.15.0) 75 | multipart-post (2.1.1) 76 | mustermann (1.1.1) 77 | ruby2_keywords (~> 0.0.1) 78 | newrelic_rpm (6.14.0) 79 | nio4r (2.5.4) 80 | nokogiri (1.11.1) 81 | mini_portile2 (~> 2.5.0) 82 | racc (~> 1.4) 83 | oj (3.10.18) 84 | padrino (0.15.0) 85 | padrino-admin (= 0.15.0) 86 | padrino-cache (= 0.15.0) 87 | padrino-core (= 0.15.0) 88 | padrino-gen (= 0.15.0) 89 | padrino-helpers (= 0.15.0) 90 | padrino-mailer (= 0.15.0) 91 | padrino-support (= 0.15.0) 92 | padrino-admin (0.15.0) 93 | padrino-core (= 0.15.0) 94 | padrino-helpers (= 0.15.0) 95 | padrino-cache (0.15.0) 96 | moneta (~> 1.1.0) 97 | padrino-core (= 0.15.0) 98 | padrino-helpers (= 0.15.0) 99 | padrino-core (0.15.0) 100 | padrino-support (= 0.15.0) 101 | sinatra (>= 2.0.0) 102 | thor (~> 0.18) 103 | padrino-gen (0.15.0) 104 | bundler (>= 1.0, < 3) 105 | padrino-core (= 0.15.0) 106 | padrino-helpers (0.15.0) 107 | i18n (>= 0.6.7, < 2) 108 | padrino-support (= 0.15.0) 109 | tilt (>= 1.4.1, < 3) 110 | padrino-mailer (0.15.0) 111 | mail (~> 2.5) 112 | mime-types (< 4) 113 | padrino-core (= 0.15.0) 114 | padrino-support (0.15.0) 115 | parallel (1.20.1) 116 | parslet (2.0.0) 117 | pry (0.13.1) 118 | coderay (~> 1.1) 119 | method_source (~> 1.0) 120 | pry-byebug (3.9.0) 121 | byebug (~> 11.0) 122 | pry (~> 0.13.0) 123 | puma (4.3.7) 124 | nio4r (~> 2.0) 125 | racc (1.5.2) 126 | rack (2.2.3) 127 | rack-protection (2.1.0) 128 | rack 129 | rack-test (1.1.0) 130 | rack (>= 1.0, < 3) 131 | rails-dom-testing (2.0.3) 132 | activesupport (>= 4.2.0) 133 | nokogiri (>= 1.6) 134 | rails-html-sanitizer (1.3.0) 135 | loofah (~> 2.3) 136 | rake (13.0.3) 137 | rb-fsevent (0.10.4) 138 | rb-inotify (0.10.1) 139 | ffi (~> 1.0) 140 | rspec (3.10.0) 141 | rspec-core (~> 3.10.0) 142 | rspec-expectations (~> 3.10.0) 143 | rspec-mocks (~> 3.10.0) 144 | rspec-core (3.10.1) 145 | rspec-support (~> 3.10.0) 146 | rspec-expectations (3.10.1) 147 | diff-lcs (>= 1.2.0, < 2.0) 148 | rspec-support (~> 3.10.0) 149 | rspec-mocks (3.10.1) 150 | diff-lcs (>= 1.2.0, < 2.0) 151 | rspec-support (~> 3.10.0) 152 | rspec-support (3.10.1) 153 | rspec_junit_formatter (0.4.1) 154 | rspec-core (>= 2, < 4, != 2.12.0) 155 | ruby-prof (1.4.2) 156 | ruby2_keywords (0.0.2) 157 | safe_yaml (1.0.5) 158 | sass (3.7.4) 159 | sass-listen (~> 4.0.0) 160 | sass-listen (4.0.0) 161 | rb-fsevent (~> 0.9, >= 0.9.4) 162 | rb-inotify (~> 0.9, >= 0.9.7) 163 | simplecov (0.21.1) 164 | docile (~> 1.1) 165 | simplecov-html (~> 0.11) 166 | simplecov_json_formatter (~> 0.1) 167 | simplecov-html (0.12.3) 168 | simplecov_json_formatter (0.1.2) 169 | sinatra (2.1.0) 170 | mustermann (~> 1.0) 171 | rack (~> 2.2) 172 | rack-protection (= 2.1.0) 173 | tilt (~> 2.0) 174 | stretchy (0.7.0) 175 | elasticsearch (~> 5.0) 176 | excon (~> 0.45) 177 | thor (0.20.3) 178 | thread_safe (0.3.6) 179 | tilt (2.0.10) 180 | typhoeus (1.4.0) 181 | ethon (>= 0.9.0) 182 | tzinfo (1.2.9) 183 | thread_safe (~> 0.1) 184 | zeitwerk (2.4.2) 185 | 186 | PLATFORMS 187 | ruby 188 | 189 | DEPENDENCIES 190 | actionview (= 6.0.3.4) 191 | aws-sdk (~> 2) 192 | cf-app-utils 193 | dotenv 194 | elasticsearch 195 | erubis 196 | hashie 197 | liquid (= 3.0.3) 198 | liquify 199 | newrelic_rpm 200 | oj 201 | padrino (~> 0.15) 202 | parallel 203 | parslet 204 | pry 205 | pry-byebug 206 | puma (~> 4.3) 207 | rack-test 208 | rake 209 | rspec 210 | rspec-mocks 211 | rspec_junit_formatter 212 | ruby-prof 213 | safe_yaml 214 | sass 215 | simplecov 216 | stretchy 217 | typhoeus 218 | 219 | RUBY VERSION 220 | ruby 2.6.6p146 221 | 222 | BUNDLED WITH 223 | 1.17.2 224 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | # Running Open Data Maker on your computer 2 | 3 | If you just want to install and run, then you can just download a 4 | [zip file](https://github.com/18F/open-data-maker/archive/master.zip). 5 | 6 | You will still need the the dependencies below, but you don't need to 7 | clone the git repo for the source code. 8 | 9 | ## Docker 10 | Local development is possible using a [Docker] container + `docker-compose` 11 | 12 | ### First run 13 | - Start docker 14 | 15 | ``` 16 | docker-compose up 17 | ``` 18 | 19 | - Run `rake import` in the `web` container. 20 | 21 | ``` 22 | docker-compose run web rake import 23 | ``` 24 | 25 | - Application is available at `http://localhost:3000` 26 | 27 | ### After initial run 28 | 29 | ``` 30 | docker-compose up 31 | ``` 32 | 33 | - or run it detached with 34 | 35 | ``` 36 | docker-compose up -d 37 | ``` 38 | 39 | ## Install Prerequisites 40 | 41 | You can run our bootstrap script to make sure you have all the dependencies. 42 | It will also install and start up Elasticsearch: 43 | 44 | ``` 45 | script/bootstrap 46 | ``` 47 | 48 | To run Open Data Maker, you will need to have the following software installed on your computer: 49 | * [Elasticsearch] 2.4.4 50 | * [Ruby] 2.6.5 51 | 52 | **NOTE: Open Data Maker indexing currently is very slow on ES2.x, however, an index created on 1.x can be restored to 2.x. 53 | 54 | ### Mac OS X 55 | 56 | On a Mac, we recommend installing [RVM]. 57 | 58 | If you are contributing to development, you will also need [Git]. 59 | 60 | 61 | ## Get the Source Code 62 | 63 | For development, [fork](http://help.github.com/fork-a-repo/) the repo 64 | first, then clone your fork. 65 | 66 | ``` 67 | git clone https://github.com//open-data-maker.git 68 | cd open-data-maker 69 | ``` 70 | 71 | ## Run the App 72 | 73 | 74 | ### Import the data 75 | 76 | To get started, you can import sample data with: 77 | 78 | `rake import` 79 | 80 | ### Start the app 81 | 82 | ``` 83 | padrino start 84 | ``` 85 | Go to: http://127.0.0.1:3000/ 86 | 87 | and you should see the text `Welcome to Open Data Maker` with a link to 88 | the API created by the [sample data](sample-data). 89 | 90 | You can verify that the import was successful by visiting 91 | http://127.0.0.1:3000/v1/cities?name=Cleveland. You should see something like: 92 | 93 | ```json 94 | { 95 | "state": "OH", 96 | "name": "Cleveland", 97 | "population": 396815, 98 | "land_area": 77.697, 99 | "location": { 100 | "lat": 41.478138, 101 | "lon": -81.679486 102 | } 103 | ``` 104 | 105 | ### Custom Datasets 106 | 107 | While the app is running (or anytime) you can run `rake import`. For instance, if you had a `presidents/data.yaml` file, you would import 108 | it with: 109 | 110 | ```sh 111 | export DATA_PATH=presidents 112 | rake import 113 | # or, more succintly: 114 | DATA_PATH=presidents rake import 115 | ``` 116 | 117 | to clear the data, assuming the data set had an index named "president-data" 118 | 119 | ``` 120 | rake es:delete[president-data] 121 | ``` 122 | 123 | you may alternately delete all the indices (which could affect other apps if 124 | they are using your local Elasticsearch) 125 | 126 | ``` 127 | rake es:delete[_all] 128 | ``` 129 | 130 | The data directory can optionally include a file called `data.yaml` (see [the sample one](sample-data/data.yaml) for its schema) that references one or more `.csv` files and specifies data types, 131 | field name mapping, and other support data. 132 | 133 | 134 | ## Debugging 135 | 136 | `ES_DEBUG` environment variable will turn on verbose tracer in the Elasticsearch client 137 | 138 | optional performance profiling for rake import: `rake import[profile=true]` 139 | 140 | 141 | ## Want to help? 142 | 143 | See [Contribution Guide](CONTRIBUTING.md) 144 | 145 | [Elasticsearch]: https://www.elastic.co/products/elasticsearch 146 | [Homebrew]: http://brew.sh/ 147 | [RVM]: https://github.com/wayneeseguin/rvm 148 | [rbenv]: https://github.com/sstephenson/rbenv 149 | [Ruby]: https://www.ruby-lang.org/en/ 150 | [Git]: https://git-scm.com/ 151 | [Docker]: https://www.docker.com/ 152 | 153 | 154 | 155 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | As a work of the United States Government, this project is in the 2 | public domain within the United States. 3 | 4 | Additionally, we waive copyright and related rights in the work 5 | worldwide through the CC0 1.0 Universal public domain dedication. 6 | 7 | ## CC0 1.0 Universal Summary 8 | 9 | This is a human-readable summary of the 10 | [Legal Code (read the full text)](https://creativecommons.org/publicdomain/zero/1.0/legalcode). 11 | 12 | ### No Copyright 13 | 14 | The person who associated a work with this deed has dedicated the work to 15 | the public domain by waiving all of his or her rights to the work worldwide 16 | under copyright law, including all related and neighboring rights, to the 17 | extent allowed by law. 18 | 19 | You can copy, modify, distribute and perform the work, even for commercial 20 | purposes, all without asking permission. 21 | 22 | ### Other Information 23 | 24 | In no way are the patent or trademark rights of any person affected by CC0, 25 | nor are the rights that other persons may have in the work or in how the 26 | work is used, such as publicity or privacy rights. 27 | 28 | Unless expressly stated otherwise, the person who associated a work with 29 | this deed makes no warranties about the work, and disclaims liability for 30 | all uses of the work, to the fullest extent permitted by applicable law. 31 | When using or citing the work, you should not imply endorsement by the 32 | author or the affirmer. 33 | 34 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: bundle exec puma -C config/puma.rb 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Open Data Maker 2 | [![Build Status](https://circleci.com/gh/RTICWDT/open-data-maker/tree/dev.svg?style=svg)](https://circleci.com/gh/18F/open-data-maker/tree/dev) 3 | 4 | The goal of this project is to make it easy to turn a lot of potentially large 5 | csv files into open data via an API and the ability for people to download 6 | smaller csv files with a subset of the data. 7 | 8 | Preliminary research suggests that open data users (journalists and others) 9 | actually know how to work with spreadsheets really well, but a lot of the 10 | data sets that we have in government are huge. 11 | 12 | The first version of this project will allow us to host a website for an 13 | agency with a specific set of csv files, which are deployed with the app. 14 | This will allows us to deploy more quickly since there will be a lower risk 15 | security profile than if an agency could upload the CSV files (which might 16 | be a nice longer term feature). 17 | 18 | 19 | ## Install and Run the App (as a developer) 20 | 21 | See our [Installation Guide](INSTALL.md) 22 | 23 | ## How this works 24 | 25 | By default, data will be loaded from /sample-data when you run `rake import` 26 | 27 | * [cities100.csv](sample-data/cities100.csv) - dataset of 100 most populous cities in the US 28 | * [data.yaml](sample-data/data.yaml) - configuration for 29 | * index name *city-data* 30 | * api endpoint name *cities* 31 | * how columns are mapped to fields in json output 32 | * data types 33 | * unique columns *name* 34 | 35 | When you run the app, you can query the dataset via json API, like: /cities?name=Chicago 36 | 37 | * http://localhost:3000/cities?name=Chicago 38 | * http://localhost:3000/cities?name=Chicago&state=IL 39 | * http://localhost:3000/cities?state=NY,MA 40 | * http://localhost:3000/cities?state=CA&fields=name,size 41 | 42 | To use your own data, you can set a different directory, for example: 43 | 44 | ``` 45 | export DATA_PATH='./data' 46 | ``` 47 | 48 | 1. Put csv files into /data 49 | 1. Import files from /data: ```rake import``` (or restart the app) 50 | 1. There can be multiple files (must end in .csv) 51 | 1. Optional [data.yaml](sample-data/data.yaml) file that specifies index name, API endpoint, file list, and a dictionary of column -> field name mapping and types 52 | 1. Optionally import all the columns, not just ones specified in dictionary (see example: [import: all](spec/fixtures/import_with_options/data.yaml)) 53 | 1. If data.yaml not provided, all fields and fields will be imported with folder or bucket name used as the API endpoint (name is 'slugified' with dashes replacing spaces) 54 | 1. api endpoint to get the data /api=endpoint?field_or_column_name=value 55 | 56 | ## More Configuration Options 57 | 58 | Often while you are developing an API and data dictionary, 59 | it is helpful to include all the columns in the csv. If you add the following to 60 | data.yaml, the field names and types from the dictionary will be used and any 61 | unspecified columns will simply use the column name as the field name. 62 | 63 | ``` 64 | options: 65 | columns: all 66 | ``` 67 | 68 | You can use the dictionary to provide nice errors to developers who use the API. 69 | This can be used in conjunction with the above ```columns: all``` which will 70 | make it so that columns that are not referenced in the dictionary are not 71 | searchable, but will make it so that unspecified fields cause errors to be 72 | reported. 73 | 74 | ``` 75 | options: 76 | search: dictionary_only 77 | ``` 78 | 79 | Also for debugging, you can limit the number of files that will be imported. This is helpful when the import process is time consuming because you have many, many files, but can test format changes with a subset of the files. 80 | 81 | ``` 82 | options: 83 | limit: 4 84 | ``` 85 | 86 | 87 | ### Acknowledgements 88 | Zipcode latitude and longitude provided by [GeoNames](http://www.geonames.org/) under under a [Creative Commons Attribution 3.0 License](http://creativecommons.org/licenses/by/3.0/). 89 | 90 | ### Public domain 91 | 92 | Except as noted above, this project is in the worldwide [public domain](LICENSE.md). As stated in [CONTRIBUTING](CONTRIBUTING.md): 93 | 94 | > This project is in the public domain within the United States, and copyright and related rights in the work worldwide are waived through the [CC0 1.0 Universal public domain dedication](https://creativecommons.org/publicdomain/zero/1.0/). 95 | > 96 | > All contributions to this project will be released under the CC0 dedication. By submitting a pull request, you are agreeing to comply with this waiver of copyright interest. 97 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'bundler/setup' 2 | require 'padrino-core/cli/rake' 3 | 4 | 5 | task :default => :spec 6 | 7 | PadrinoTasks.init 8 | -------------------------------------------------------------------------------- /app/app.rb: -------------------------------------------------------------------------------- 1 | require 'csv' 2 | 3 | module OpenDataMaker 4 | class App < Padrino::Application 5 | register SassInitializer 6 | register Padrino::Helpers 7 | 8 | # This app is stateless and session cookies prevent caching of API responses 9 | disable :sessions 10 | 11 | # This app has no sensitive bits and csrf protection requires sessions 12 | disable :protect_from_csrf 13 | 14 | if ENV['DATA_AUTH'] and not ENV['DATA_AUTH'].empty? 15 | auth = ENV['DATA_AUTH'] 16 | authorized_user, authorized_pass = auth.split(',') 17 | use Rack::Auth::Basic, "Restricted Area" do |username, password| 18 | username == authorized_user and password == authorized_pass 19 | end 20 | end 21 | 22 | ## app setup 23 | if ENV['RACK_ENV'] == 'test' 24 | DataMagic.init(load_now: true) 25 | else 26 | DataMagic.init(load_now: false) # don't index data 27 | end 28 | 29 | end 30 | 31 | end 32 | -------------------------------------------------------------------------------- /app/index_app.rb: -------------------------------------------------------------------------------- 1 | require 'csv' 2 | 3 | module OpenDataMaker 4 | 5 | class IndexApp < Padrino::Application 6 | register SassInitializer 7 | register Padrino::Helpers 8 | 9 | enable :sessions 10 | 11 | get '/' do 12 | DataMagic.config.scoped_index_name 13 | end 14 | 15 | get '/init' do 16 | DataMagic.init(load_now: true) 17 | "ok" 18 | end 19 | 20 | get '/reindex' do 21 | DataMagic.reindex 22 | "reindexing..." 23 | end 24 | end 25 | 26 | end 27 | -------------------------------------------------------------------------------- /app/stylesheets/application.sass: -------------------------------------------------------------------------------- 1 | body 2 | -webkit-font-smoothing: antialiased 3 | font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif 4 | font-size: 1em 5 | line-height: 1.5 6 | color: #333 7 | 8 | h1, h2, h3, h4, h5, h6 9 | font-family: "Raleway", "Helvetica Neue", Helvetica, Arial, sans-serif 10 | line-height: 1.1em 11 | margin: 0 12 | text-rendering: optimizeLegibility 13 | 14 | p 15 | margin: 0 0 0.75em 16 | 17 | hr 18 | border-bottom: 1px solid silver 19 | border-left: none 20 | border-right: none 21 | border-top: none 22 | margin: 1em 0 23 | 24 | img 25 | -webkit-user-select: none 26 | cursor: zoom-in 27 | margin: 0 28 | max-width: 50% 29 | 30 | .logo 31 | height: 150px 32 | width: 150px 33 | top: 50px 34 | left: 50px 35 | z-index: 20 36 | 37 | @media screen and (max-width: 995px) 38 | .logo 39 | height: 100px 40 | width: 100px 41 | top: 40px 42 | left: 20px 43 | 44 | @media screen and (max-width: 785px) 45 | .logo 46 | height: 75px 47 | width: 75px 48 | 49 | @media screen and (max-width: 590px) 50 | .logo 51 | top: 73px 52 | 53 | @media screen and (max-width: 480px) 54 | .logo 55 | top: 16px 56 | left: 0px 57 | 58 | .bottom-margin 59 | margin-bottom: 0.5em 60 | color: #c00 61 | 62 | .title 63 | text-align: center 64 | font-family: "Raleway", "Helvetica Neue", Helvetica, Arial, sans-serif 65 | font-size: 2em 66 | line-height: 2em 67 | 68 | .header 69 | background-color: #9cf 70 | 71 | .categories .category 72 | margin: 5px 73 | padding: 15px 74 | border: solid 1px silver 75 | word-wrap: break-word 76 | display: inline-block 77 | width: 92% 78 | background-color: #ffc 79 | a 80 | color: black 81 | text-decoration: none 82 | &:visited 83 | color: black 84 | 85 | .categories__column 86 | display: inline-block 87 | width: 100% 88 | vertical-align: top 89 | -webkit-column-count: 2 90 | -moz-column-count: 2 91 | column-count: 2 92 | column-gap: .2em 93 | -webkit-column-gap: .2em 94 | -moz-column-gap: .2em 95 | 96 | .category__name 97 | font-size: 18px 98 | font-weight: bold 99 | margin-bottom: 5px 100 | color: #c00 101 | 102 | .category__fields 103 | list-style: none 104 | padding: 0 105 | 106 | .category__field-name 107 | font-size: 15px 108 | font-weight: bold 109 | margin-bottom: 2px 110 | color: #c00 111 | width: 80% 112 | 113 | .category__field-type 114 | font-size: 15px 115 | font-weight: bold 116 | color: #c00 117 | width: 10% 118 | float: right 119 | -------------------------------------------------------------------------------- /app/views/category.liquid: -------------------------------------------------------------------------------- 1 |
2 |
3 | 4 |
5 |

{{ title }}

6 |
7 |
8 |
9 |
10 | 11 |

Data Details for the Category

12 | 13 |
14 |
15 |
16 |
17 | 18 |
19 | Back to the list of Categories 20 | 21 | 55 | -------------------------------------------------------------------------------- /app/views/home.liquid: -------------------------------------------------------------------------------- 1 |
2 |
3 | 15 |
16 |

{{ title }}

17 |
18 |
19 |
20 |
21 | 22 |

API endpoints

23 | 24 | 29 | 30 | {% if examples.size > 0 %} 31 |

Examples

32 | 33 | 38 | {% endif %} 39 | 40 |

Browse Data Details by Category

41 |
42 |
43 |
44 | 45 | 64 | -------------------------------------------------------------------------------- /app/views/layouts/application.erb: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | <%== yield %> 10 | 11 | 12 | -------------------------------------------------------------------------------- /bin/open-data-maker: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | Dir.chdir(File.dirname(__FILE__)+'/..') 4 | 5 | # Start the app with Padrino::Server 6 | require 'rubygems' 7 | require 'bundler/setup' 8 | require 'padrino-core/cli/launcher' 9 | 10 | ARGV.unshift('start') if ARGV.first.nil? || ARGV.first.start_with?('-') 11 | Padrino::Cli::Launcher.start ARGV 12 | 13 | # Start the app with Rack::Server 14 | #require "rack" 15 | #Rack::Server.start 16 | -------------------------------------------------------------------------------- /config.ru: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env rackup 2 | # encoding: utf-8 3 | 4 | # This file can be used to start Padrino, 5 | # just execute it from the command line. 6 | 7 | require File.expand_path("../config/boot.rb", __FILE__) 8 | 9 | run Padrino.application 10 | -------------------------------------------------------------------------------- /config/apps.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file mounts each app in the Padrino project to a specified sub-uri. 3 | # You can mount additional applications using any of these commands below: 4 | # 5 | # Padrino.mount('blog').to('/blog') 6 | # Padrino.mount('blog', :app_class => 'BlogApp').to('/blog') 7 | # Padrino.mount('blog', :app_file => 'path/to/blog/app.rb').to('/blog') 8 | # 9 | # You can also map apps to a specified host: 10 | # 11 | # Padrino.mount('Admin').host('admin.example.org') 12 | # Padrino.mount('WebSite').host(/.*\.?example.org/) 13 | # Padrino.mount('Foo').to('/foo').host('bar.example.org') 14 | # 15 | # Note 1: Mounted apps (by default) should be placed into the project root at '/app_name'. 16 | # Note 2: If you use the host matching remember to respect the order of the rules. 17 | # 18 | # By default, this file mounts the primary app which was generated with this project. 19 | # However, the mounted app can be modified as needed: 20 | # 21 | # Padrino.mount('AppName', :app_file => 'path/to/file', :app_class => 'BlogApp').to('/') 22 | # 23 | 24 | ## 25 | # Setup global project settings for your apps. These settings are inherited by every subapp. You can 26 | # override these settings in the subapps as needed. 27 | # 28 | Padrino.configure_apps do 29 | # enable :sessions 30 | set :session_secret, 'ffb8bfc2d71e2ad938950169de2757ab7b73b1cd5fbf91b4b912ae493dc5b70f' 31 | set :protection, :except => :path_traversal 32 | set :protect_from_csrf, true 33 | 34 | set :allow_origin, :any 35 | 36 | end 37 | 38 | # If needed, mount the app that does indexing 39 | if ENV['INDEX_APP'] == "enable" 40 | puts "mounting index app" 41 | Padrino.mount('OpenDataMaker::IndexApp', :app_file => Padrino.root('app/index_app.rb')).to('/index') 42 | end 43 | 44 | # Mounts the core application for this project 45 | Padrino.mount('OpenDataMaker::App', :app_file => Padrino.root('app/app.rb')).to('/') 46 | -------------------------------------------------------------------------------- /config/boot.rb: -------------------------------------------------------------------------------- 1 | require_relative 'env.rb' 2 | 3 | ## 4 | # ## Enable devel logging 5 | # 6 | # Padrino::Logger::Config[:development][:log_level] = :devel 7 | # Padrino::Logger::Config[:development][:log_static] = true 8 | # 9 | # ## Configure your I18n 10 | # 11 | # I18n.default_locale = :en 12 | # I18n.enforce_available_locales = false 13 | # 14 | # ## Configure your HTML5 data helpers 15 | # 16 | # Padrino::Helpers::TagHelpers::DATA_ATTRIBUTES.push(:dialog) 17 | # text_field :foo, :dialog => true 18 | # Generates: 19 | # 20 | # ## Add helpers to mailer 21 | # 22 | # Mail::Message.class_eval do 23 | # include Padrino::Helpers::NumberHelpers 24 | # include Padrino::Helpers::TranslationHelpers 25 | # end 26 | 27 | ## 28 | # Add your before (RE)load hooks here 29 | # 30 | Padrino.before_load do 31 | end 32 | 33 | ## 34 | # Add your after (RE)load hooks here 35 | # 36 | Padrino.after_load do 37 | end 38 | 39 | Padrino.load! 40 | -------------------------------------------------------------------------------- /config/env.rb: -------------------------------------------------------------------------------- 1 | # define core environment that we need in tests and for the app 2 | 3 | # Defines our constants 4 | ENV['RACK_ENV'] ||= 'development' 5 | RACK_ENV = ENV['RACK_ENV'] unless defined?(RACK_ENV) 6 | PADRINO_ROOT = File.expand_path('../..', __FILE__) unless defined?(PADRINO_ROOT) 7 | 8 | # Load our dependencies 9 | require 'rubygems' unless defined?(Gem) 10 | require 'bundler/setup' 11 | require 'newrelic_rpm' 12 | Bundler.require(:default, RACK_ENV) 13 | 14 | # do this early so we can log during startup 15 | require './lib/data_magic/config.rb' 16 | DataMagic::Config.logger=Logger.new(STDOUT) if ENV['VCAP_APPLICATION'] # Cloud Foundry 17 | -------------------------------------------------------------------------------- /config/puma.rb: -------------------------------------------------------------------------------- 1 | workers Integer(ENV['WEB_CONCURRENCY'] || 2) 2 | threads_count = Integer(ENV['MAX_THREADS'] || 5) 3 | threads threads_count, threads_count 4 | worker_timeout 30 5 | 6 | preload_app! 7 | 8 | rackup DefaultRackup 9 | port ENV['PORT'] || 3000 10 | environment ENV['RACK_ENV'] || 'development' 11 | -------------------------------------------------------------------------------- /config/unicorn.rb: -------------------------------------------------------------------------------- 1 | worker_processes 5 2 | timeout 30 3 | preload_app true 4 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | odm-elasticsearch: 4 | image: elasticsearch:2.4.4 5 | container_name: odm-elasticsearch 6 | environment: 7 | ES_JAVA_OPTS: "-Dmapper.allow_dots_in_name=true -Xms512m -Xmx512m" 8 | discovery.type: single-node 9 | ports: 10 | - "9200:9200" 11 | volumes: 12 | - ./.docker/data/es:/usr/share/elasticsearch/data 13 | web: 14 | build: . 15 | command: bash -c 16 | "./wait-for-it.sh odm-elasticsearch:9200 && bundle exec padrino start -h 0.0.0.0" 17 | volumes: 18 | - .:/myapp 19 | ports: 20 | - "3000:3000" 21 | environment: 22 | - ES_URI=odm-elasticsearch 23 | - RACK_ENV=test # Set env to ensure ES configs with replicas 0 24 | depends_on: 25 | - odm-elasticsearch -------------------------------------------------------------------------------- /lib/data_magic/category.rb: -------------------------------------------------------------------------------- 1 | Category = Struct.new(:category_id) do 2 | def assemble 3 | category_entry = DataMagic.config.data['categories'][category_id] 4 | dictionary = DataMagic.config.dictionary 5 | field_details = {} 6 | category_entry['fields'].each do |field_name| 7 | field_details[field_name] = dictionary[field_name] || { "description"=>"" } 8 | end 9 | field_details = { "field_details" => field_details } 10 | assemble = category_entry.merge(field_details) 11 | end 12 | end 13 | -------------------------------------------------------------------------------- /lib/data_magic/error_checker.rb: -------------------------------------------------------------------------------- 1 | require 'action_view' 2 | include ActionView::Helpers::SanitizeHelper 3 | module DataMagic 4 | module ErrorChecker 5 | class << self 6 | def check(params, options, config) 7 | report_required_params_absent(options) + 8 | report_nonexistent_params(params, config) + 9 | report_nonexistent_operators(params) + 10 | report_nonexistent_fields(options[:fields], config) + 11 | report_bad_range_argument(params) + 12 | report_wrong_field_type(params, config) + 13 | report_wrong_zip(options) + 14 | report_distance_requires_zip(options) 15 | end 16 | 17 | private 18 | 19 | def report_required_params_absent(options) 20 | if options[:command] == 'stats' && options[:fields].length == 0 21 | [build_error(error: 'invalid_or_incomplete_parameters', input: options[:command])] 22 | else 23 | [] 24 | end 25 | end 26 | 27 | def report_distance_requires_zip(params) 28 | # if distance, must have zip 29 | return [] if (params[:distance] && params[:zip]) || (!params[:distance]) 30 | [build_error( 31 | error: 'distance_error' 32 | )] 33 | end 34 | 35 | def report_wrong_zip(params) 36 | return [] if !params[:zip] || Zipcode.valid?(params[:zip]) 37 | [build_error( 38 | error: 'zipcode_error', 39 | parameter: :zip, 40 | input: params[:zip].to_s 41 | )] 42 | end 43 | 44 | def report_nonexistent_params(params, config) 45 | return [] unless config.dictionary_only_search? 46 | params.keys.reject { |p| config.field_type(strip_op(p)) }. 47 | map { |p| build_error(error: 'parameter_not_found', input: strip_op(p)) } 48 | end 49 | 50 | def report_nonexistent_operators(params) 51 | params.keys.select { |p| p =~ /__(\w+)$/ && $1 !~ /range|not|ne/i }. 52 | map do |p| 53 | (param, op) = p.match(/^(.*)__(\w+)$/).captures 54 | build_error(error: 'operator_not_found', parameter: param, input: op) 55 | end 56 | end 57 | 58 | def report_nonexistent_fields(fields, config) 59 | if fields && !fields.empty? && config.dictionary_only_search? 60 | fields.reject { |f| config.field_type(f.to_s) }. 61 | map { |f| build_error(error: 'field_not_found', input: f.to_s) } 62 | else 63 | [] 64 | end 65 | end 66 | 67 | def report_bad_range_argument(params) 68 | ranges = params.select do |p,v| 69 | p =~ /__range$/ and 70 | v !~ / ^(-?\d+(\.\d+)?)? # optional starting number 71 | \.\. # range dots 72 | (-?\d+(\.\d+)?)? # optional ending number 73 | (,(\d+(\.\d+)?)?\.\.(\d+(\.\d+)?)?)* # and more, with commas 74 | $/x 75 | end 76 | ranges.map do |p,v| 77 | build_error(error: 'range_format_error', parameter: strip_op(p), input: v) 78 | end 79 | end 80 | 81 | def report_wrong_field_type(params, config) 82 | bad_fields = params.select do |p, v| 83 | next false if p =~ /__range$/ 84 | param_type = config.field_type(strip_op(p)) 85 | value_type = guess_value_type(v) 86 | (param_type == "float" && value_type != "float" && value_type != "integer") or 87 | (param_type == "integer" && value_type != "integer") 88 | end 89 | bad_fields.map do |p, v| 90 | build_error(error: 'parameter_type_error', parameter: p, input: v, 91 | expected_type: config.field_type(strip_op(p)), 92 | input_type: guess_value_type(v)) 93 | end 94 | end 95 | 96 | def build_error(opts) 97 | opts[:input] = sanitize_error_opts(opts[:input]) unless opts[:input].nil? 98 | opts[:parameter] = sanitize_error_opts(opts[:parameter]) unless opts[:parameter].nil? 99 | opts[:message] = 100 | case opts[:error] 101 | when 'invalid_or_incomplete_parameters' 102 | "The command #{opts[:input]} requires a fields parameter." 103 | when 'parameter_not_found' 104 | "The input parameter '#{opts[:input]}' is not known in this dataset." 105 | when 'field_not_found' 106 | "The input field '#{opts[:input]}' (in the fields parameter) is not a field in this dataset." 107 | when 'operator_not_found' 108 | "The input operator '#{opts[:input]}' (appended to the parameter '#{opts[:parameter]}') is not known or supported. (Known operators: range, ne, not)" 109 | when 'parameter_type_error' 110 | "The parameter '#{opts[:parameter]}' expects a value of type #{opts[:expected_type]}, but received '#{opts[:input]}' which is a value of type #{opts[:input_type]}." 111 | when 'range_format_error' 112 | "The range '#{opts[:input]}' supplied to parameter '#{opts[:parameter]}' isn't in the correct format." 113 | when 'zipcode_error' 114 | "The provided zipcode, '#{opts[:input]}', is not valid." 115 | when 'distance_error' 116 | "Use of the 'distance' parameter also requires a 'zip' parameter." 117 | end 118 | opts 119 | end 120 | 121 | def guess_value_type(value) 122 | case value.to_s 123 | when /^-?\d+$/ 124 | "integer" 125 | when /^(-?\d+,?)+$/ # list of integers 126 | "integer" 127 | when /^-?\d+\.\d+$/ 128 | "float" 129 | else 130 | "string" 131 | end 132 | end 133 | 134 | def strip_op(param) 135 | param.sub(/__\w+$/, '') 136 | end 137 | 138 | def sanitize_error_opts(html) 139 | strip_tags(html.to_s) 140 | end 141 | 142 | end 143 | end 144 | end 145 | -------------------------------------------------------------------------------- /lib/data_magic/example.rb: -------------------------------------------------------------------------------- 1 | class Example < Hashie::Mash 2 | include Hashie::Extensions::Coercion 3 | include Hashie::Extensions::MergeInitializer 4 | coerce_key :name, String 5 | coerce_key :description, String 6 | coerce_key :params, String 7 | coerce_key :endpoint, String 8 | coerce_key :link, String 9 | def initialize(hash = {}) 10 | super 11 | # we want to use this in a liquid template 12 | # so all attributes needs to be plain data, not code 13 | self[:link] = "/v1/#{endpoint}?#{params}" if self[:link].nil? 14 | end 15 | 16 | end 17 | -------------------------------------------------------------------------------- /lib/data_magic/index/builder_data.rb: -------------------------------------------------------------------------------- 1 | module DataMagic 2 | module Index 3 | class BuilderData 4 | attr_reader :data, :options 5 | 6 | def initialize(data, options) 7 | @options = options 8 | @data = data 9 | end 10 | 11 | def additional_fields 12 | options[:mapping] || {} 13 | end 14 | 15 | def new_field_names 16 | field_names = options[:fields] || {} 17 | field_names.merge(additional_fields) 18 | end 19 | 20 | def additional_data 21 | options[:add_data] 22 | end 23 | end 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /lib/data_magic/index/document.rb: -------------------------------------------------------------------------------- 1 | module DataMagic 2 | module Index 3 | class Document 4 | attr_reader :data, :id 5 | 6 | def initialize(data) 7 | @data = data 8 | @id = calculate_id 9 | end 10 | 11 | def remove_ids 12 | config.data['unique'].each { |key| data.delete key } 13 | end 14 | 15 | def headers 16 | data.keys.map(&:to_s) # does this only return top level fields? 17 | end 18 | 19 | def preview(n=500) 20 | data.inspect[0..n] 21 | end 22 | 23 | def id_empty? 24 | id && id.empty? 25 | end 26 | 27 | private 28 | 29 | def calculate_id 30 | return nil if config.data['unique'].length == 0 31 | config.data['unique'].map { |field| data[field] }.join(':') 32 | end 33 | 34 | def config 35 | DataMagic.config 36 | end 37 | end 38 | end 39 | end 40 | -------------------------------------------------------------------------------- /lib/data_magic/index/event_logger.rb: -------------------------------------------------------------------------------- 1 | module DataMagic 2 | module Index 3 | class EventLogger 4 | def trigger(event, *args) 5 | self.send(event, *args) 6 | end 7 | 8 | ['debug', 'info', 'warn', 'error'].each do |level| 9 | class_eval <<-RUBY, __FILE__, __LINE__ + 1 10 | def #{level}(message, object=nil, limit=nil) 11 | logger.#{level}(full_message(message, object, limit)) 12 | end 13 | RUBY 14 | end 15 | 16 | def full_message(prefix, object, limit) 17 | return prefix unless object 18 | message = "#{prefix}: " 19 | if limit 20 | message << object.inspect[0..limit] 21 | else 22 | message << object.inspect 23 | end 24 | message 25 | end 26 | end 27 | end 28 | end 29 | -------------------------------------------------------------------------------- /lib/data_magic/index/importer.rb: -------------------------------------------------------------------------------- 1 | require 'forwardable' 2 | 3 | module DataMagic 4 | module Index 5 | class Importer 6 | attr_reader :raw_data, :options, :row_map 7 | 8 | def initialize(raw_data, options, row_map) 9 | @raw_data = raw_data 10 | @options = options 11 | @row_map = row_map 12 | end 13 | 14 | def process 15 | setup 16 | parse_and_log 17 | finish! 18 | [row_count, headers] 19 | end 20 | 21 | def client 22 | @client ||= SuperClient.new(es_client, options) 23 | end 24 | 25 | def builder_data 26 | @builder_data ||= BuilderData.new(raw_data, options) 27 | end 28 | 29 | def row_map 30 | @row_map || {} 31 | end 32 | 33 | def output 34 | @output ||= Output.new 35 | end 36 | 37 | def parse_and_log 38 | parse_csv 39 | rescue InvalidData => e 40 | trigger("error", e.message) 41 | raise InvalidData, "invalid file format" if empty? 42 | end 43 | 44 | def chunk_size 45 | (ENV['CHUNK_SIZE'] || 100).to_i 46 | end 47 | 48 | def nprocs 49 | (ENV['NPROCS'] || 1).to_i 50 | end 51 | 52 | def parse_csv 53 | if nprocs == 1 54 | parse_csv_whole 55 | elsif client.nested_partial? 56 | parse_csv_mapped 57 | else 58 | parse_csv_chunked 59 | end 60 | data.close 61 | end 62 | 63 | def parse_csv_whole 64 | CSV.new( 65 | data, 66 | headers: true, 67 | header_converters: lambda { |str| str.strip.to_sym } 68 | ).each do |row| 69 | dispatch_row_importer(row) 70 | break if at_limit? 71 | end 72 | end 73 | 74 | def parse_csv_chunked 75 | CSV.new( 76 | data, 77 | headers: true, 78 | header_converters: lambda { |str| str.strip.to_sym } 79 | ).each.each_slice(chunk_size) do |chunk| 80 | break if at_limit? 81 | chunks_per_proc = (chunk.size / nprocs.to_f).ceil 82 | Parallel.each(chunk.each_slice(chunks_per_proc)) do |rows| 83 | rows.each_with_index do |row, idx| 84 | dispatch_row_importer(row) 85 | end 86 | end 87 | if !headers 88 | single_document = DocumentBuilder.create(chunk.first, builder_data, DataMagic.config) 89 | set_headers(single_document) 90 | end 91 | increment(chunk.size) 92 | end 93 | end 94 | 95 | def parse_csv_mapped 96 | rocky_chunks = CSV.new( 97 | data, 98 | headers: true, 99 | header_converters: lambda { |str| str.strip.to_sym } 100 | ).chunk_while { |a, b| 101 | # chunk by nested document link 102 | lookup_row_id(a) === lookup_row_id(b) 103 | }.to_a 104 | 105 | # rearrange chunks for parallel processing, so our slices are 'roughly' the same size 106 | sorted = rocky_chunks.sort_by(&:size) 107 | grouped = sorted.each.each_with_index.group_by { |_, index| index % nprocs } 108 | smooth_chunks = grouped.map { |_, data| 109 | # here we only return the first array , each_with_index was adding in an unwanted index item 110 | data.map(&:first) 111 | }.flatten(1) 112 | 113 | chunks_per_proc = (smooth_chunks.size / nprocs.to_f).ceil 114 | 115 | Parallel.each(smooth_chunks.each_slice(chunks_per_proc)) do |chunks| 116 | chunks.each do |chunk| 117 | dispatch_row_importer(chunk) 118 | end 119 | end 120 | increment(smooth_chunks.size) 121 | end 122 | 123 | def dispatch_row_importer(row) 124 | if client.nested_partial? 125 | if row.is_a?(Array) 126 | dispatch_row_bulk_importer(row) 127 | else 128 | row_id = lookup_row_id(row) 129 | Array(row_map.map[row_id]).each do |related_id| 130 | row << [row_map.id, related_id] 131 | RowImporter.process(row, self) 132 | end 133 | end 134 | else 135 | RowImporter.process(row, self) 136 | end 137 | end 138 | 139 | def dispatch_row_bulk_importer(rows) 140 | row_id = lookup_row_id(rows[0]) 141 | Array(row_map.map[row_id]).each do |related_id| 142 | rows.each do |row| 143 | row << [row_map.id, related_id] 144 | end 145 | RowBulkImporter.process(rows, self) 146 | end 147 | end 148 | 149 | def lookup_row_id(row) 150 | link = row_map.calculate_column(options[:partial_map]['link']) 151 | row.to_hash[link] 152 | end 153 | 154 | def setup 155 | client.create_index 156 | log_setup 157 | end 158 | 159 | def finish! 160 | validate! 161 | refresh_index if ENV['RACK_ENV'] == 'test' 162 | log_finish 163 | end 164 | 165 | def log_setup 166 | opts = options.reject { |k,v| k == :mapping } 167 | trigger("info", "options", opts) 168 | trigger("info", "new_field_names", new_field_names) 169 | trigger("info", "additional_data", additional_data) 170 | end 171 | 172 | def log_finish 173 | trigger("info", "skipped (missing parent id)", output.skipped) if !output.skipped.empty? 174 | trigger('info', "done #{row_count} rows") 175 | end 176 | 177 | def event_logger 178 | @event_logger ||= EventLogger.new 179 | end 180 | 181 | def at_limit? 182 | options[:limit_rows] && row_count == options[:limit_rows] 183 | end 184 | 185 | extend Forwardable 186 | 187 | def_delegators :output, :set_headers, :skipping, :skipped, :increment, :row_count, :log_limit, 188 | :empty?, :validate!, :headers 189 | def_delegators :builder_data, :data, :new_field_names, :additional_data 190 | def_delegators :client, :refresh_index 191 | def_delegators :event_logger, :trigger 192 | 193 | def self.process(*args) 194 | new(*args).process 195 | end 196 | 197 | private 198 | 199 | def es_client 200 | DataMagic.client 201 | end 202 | end 203 | end 204 | end 205 | -------------------------------------------------------------------------------- /lib/data_magic/index/output.rb: -------------------------------------------------------------------------------- 1 | module DataMagic 2 | module Index 3 | class Output 4 | attr_reader :row_count, :headers, :skipped 5 | 6 | def initialize 7 | @row_count = 0 8 | @skipped = [] 9 | end 10 | 11 | def set_headers(doc) 12 | return if headers 13 | if doc.is_a?(Array) 14 | @headers = doc[0].headers 15 | else 16 | @headers = doc.headers 17 | end 18 | end 19 | 20 | def skipping(id) 21 | skipped << id 22 | end 23 | 24 | def increment(count = 1) 25 | @row_count += count 26 | end 27 | 28 | def validate! 29 | raise DataMagic::InvalidData, "zero rows" if empty? 30 | end 31 | 32 | def empty? 33 | row_count == 0 34 | end 35 | 36 | def log(doc) 37 | log_0(doc) if empty? 38 | log_marker if row_count % 500 == 0 39 | end 40 | 41 | def log_skips 42 | return if skipped.empty? 43 | logger.info "skipped (missing parent id): #{skipped.join(',')}" 44 | end 45 | 46 | def log_limit 47 | logger.info "done now, limiting rows to #{row_count}" 48 | end 49 | 50 | private 51 | 52 | def log_0(document) 53 | logger.debug "csv parsed" 54 | logger.info "row#{row_count} -> #{document.preview}" 55 | end 56 | 57 | def log_marker 58 | logger.info "indexing rows: #{row_count}..." 59 | end 60 | end 61 | end 62 | end 63 | -------------------------------------------------------------------------------- /lib/data_magic/index/repository.rb: -------------------------------------------------------------------------------- 1 | module DataMagic 2 | module Index 3 | class Repository 4 | attr_reader :client, :document 5 | 6 | def initialize(client, document) 7 | @client = client 8 | @document = document 9 | end 10 | 11 | def skipped? 12 | @skipped 13 | end 14 | 15 | def save 16 | @skipped = false 17 | if client.creating? 18 | create 19 | else 20 | update 21 | end 22 | end 23 | 24 | private 25 | 26 | def update 27 | if client.allow_skips? 28 | update_with_rescue 29 | else 30 | update_without_rescue 31 | end 32 | end 33 | 34 | def create 35 | client.index({ 36 | index: client.index_name, 37 | id: document.id, 38 | type: 'document', 39 | body: document.data, 40 | timeout: '5m' 41 | }) 42 | end 43 | 44 | def update_without_rescue 45 | if client.nested_partial? 46 | update_nested_partial 47 | else 48 | client.update({ 49 | index: client.index_name, 50 | id: document.id, 51 | type: 'document', 52 | body: {doc: document.data}, 53 | timeout: '5m' 54 | }) 55 | end 56 | end 57 | 58 | def update_with_rescue 59 | update_without_rescue 60 | rescue Elasticsearch::Transport::Transport::Errors::NotFound 61 | @skipped = true 62 | end 63 | 64 | def update_nested_partial 65 | if document.is_a?(Array) 66 | update_bulk_nested_partial 67 | else 68 | doc = { 69 | index: client.index_name, 70 | id: document.id, 71 | type: 'document', 72 | body: {doc: document.data}, 73 | timeout: '5m' 74 | } 75 | root_key = client.options[:nest]['key'] 76 | partial_path = client.options[:partial_map]['path'] 77 | 78 | # extract some keys of the dotted path 79 | path_keys = partial_path.split('.') 80 | first = path_keys.first 81 | path_keys = path_keys.unshift(root_key) 82 | 83 | # extract the current row's nested data, in the case we're appending to an exiting array 84 | nested_item = document.data.dig(*path_keys)[0] 85 | 86 | # this script will either create the new nested array if it doesn't exist, or append the nested item 87 | script = "if (ctx._source['#{root_key}'].#{first} == null) { ctx._source['#{root_key}'].#{first} = data['#{root_key}'].#{first}; } else { ctx._source['#{root_key}'].#{partial_path} += inner; }" 88 | doc[:body] = { script: script, params: { inner: nested_item, data: document.data } } 89 | doc[:retry_on_conflict] = 5 90 | client.update(doc) 91 | end 92 | end 93 | 94 | def update_bulk_nested_partial 95 | root_key = client.options[:nest]['key'] 96 | partial_path = client.options[:partial_map]['path'] 97 | 98 | # extract some keys of the dotted path 99 | path_keys = partial_path.split('.') 100 | first = path_keys.first 101 | path_keys = path_keys.unshift(root_key) 102 | 103 | nested_items = document.map do |doc| 104 | doc.data.dig(*path_keys)[0] 105 | end 106 | 107 | hash = NestedHash.new 108 | hash.dotkey_set(path_keys.join('.'), nested_items) 109 | 110 | doc = { 111 | index: client.index_name, 112 | id: document[0].id, 113 | type: 'document', 114 | timeout: '5m' 115 | } 116 | # this script will either create the full object path and new nested array if it doesn't exist already, or create the new nested items array 117 | script = "if (ctx._source['#{root_key}'] == null) { ctx._source['#{root_key}'] = data['#{root_key}']; } else { if (ctx._source['#{root_key}'].#{first} == null) { ctx._source['#{root_key}'].#{first} = data['#{root_key}'].#{first}; } else { ctx._source['#{root_key}'].#{partial_path} = inner; } }" 118 | doc[:body] = { script: script, params: { inner: nested_items, data: hash } } 119 | doc[:retry_on_conflict] = 5 120 | client.update(doc) 121 | end 122 | end 123 | end 124 | end 125 | -------------------------------------------------------------------------------- /lib/data_magic/index/row_bulk_importer.rb: -------------------------------------------------------------------------------- 1 | require 'forwardable' 2 | 3 | module DataMagic 4 | module Index 5 | class RowBulkImporter 6 | attr_reader :rows, :importer 7 | 8 | def initialize(rows, importer) 9 | @rows = rows 10 | @importer = importer 11 | end 12 | 13 | def process 14 | log_row_start 15 | before_save 16 | save 17 | after_save 18 | log_row_end 19 | end 20 | 21 | def documents 22 | @documents ||= DocumentBuilder.create(rows, importer.builder_data, config) 23 | end 24 | 25 | def repository 26 | @repository ||= Repository.new(importer.client, documents) 27 | end 28 | 29 | private 30 | 31 | def log_row_start 32 | trigger("debug", "csv parsed") if importer.empty? 33 | trigger("info", "row #{importer.row_count}", documents, 500) if importer.row_count % 500 == 0 34 | #trigger("info", "id", document.id) 35 | if documents[0].id_empty? 36 | trigger("warn", "blank id") 37 | trigger("warn", "unique", config.data["unique"]) 38 | trigger("warn", "in row", documents, 255) 39 | end 40 | end 41 | 42 | def before_save 43 | importer.set_headers(documents) 44 | end 45 | 46 | def save 47 | repository.save 48 | end 49 | 50 | def after_save 51 | importer.skipping(documents[0].id) if repository.skipped? 52 | importer.increment 53 | end 54 | 55 | def log_row_end 56 | return if !importer.at_limit? 57 | trigger("info", "done now, limiting rows to #{importer.row_count}") 58 | end 59 | 60 | def config 61 | DataMagic.config 62 | end 63 | 64 | extend Forwardable 65 | 66 | def_delegators :importer, :trigger 67 | 68 | def self.process(*args) 69 | new(*args).process 70 | end 71 | end 72 | end 73 | end 74 | -------------------------------------------------------------------------------- /lib/data_magic/index/row_importer.rb: -------------------------------------------------------------------------------- 1 | require 'forwardable' 2 | 3 | module DataMagic 4 | module Index 5 | class RowImporter 6 | attr_reader :row, :importer 7 | 8 | def initialize(row, importer) 9 | @row = row 10 | @importer = importer 11 | end 12 | 13 | def process 14 | log_row_start 15 | before_save 16 | save 17 | after_save 18 | log_row_end 19 | end 20 | 21 | def document 22 | @document ||= DocumentBuilder.create(row, importer.builder_data, config) 23 | end 24 | 25 | def repository 26 | @repository ||= Repository.new(importer.client, document) 27 | end 28 | 29 | private 30 | 31 | def log_row_start 32 | trigger("debug", "csv parsed") if importer.empty? 33 | trigger("info", "row #{importer.row_count}", document, 500) if importer.row_count % 500 == 0 34 | #trigger("info", "id", document.id) 35 | if document.id_empty? 36 | trigger("warn", "blank id") 37 | trigger("warn", "unique", config.data["unique"]) 38 | trigger("warn", "in row", document, 255) 39 | end 40 | end 41 | 42 | def before_save 43 | importer.set_headers(document) 44 | end 45 | 46 | def save 47 | repository.save 48 | end 49 | 50 | def after_save 51 | importer.skipping(document.id) if repository.skipped? 52 | importer.increment 53 | end 54 | 55 | def log_row_end 56 | return if !importer.at_limit? 57 | trigger("info", "done now, limiting rows to #{importer.row_count}") 58 | end 59 | 60 | def config 61 | DataMagic.config 62 | end 63 | 64 | extend Forwardable 65 | 66 | def_delegators :importer, :trigger 67 | 68 | def self.process(*args) 69 | new(*args).process 70 | end 71 | end 72 | end 73 | end 74 | -------------------------------------------------------------------------------- /lib/data_magic/index/row_map.rb: -------------------------------------------------------------------------------- 1 | module DataMagic 2 | module Index 3 | class RowMap 4 | attr_reader :map, :id, :related 5 | 6 | def initialize(primary_key, join_key) 7 | @id = calculate_column(primary_key) 8 | @related = calculate_column(join_key) 9 | @map = {} 10 | end 11 | 12 | def add_item(row) 13 | # only add unique ids to the related key array 14 | @map[row[@related]] = (@map[row[@related]] ||= []) | [row[@id]] 15 | end 16 | 17 | def map 18 | @map 19 | end 20 | 21 | def calculate_column(value) 22 | column_name = DataMagic::config.field_mapping.invert[value] 23 | column_name.to_sym unless column_name.nil? 24 | end 25 | 26 | end 27 | end 28 | end 29 | -------------------------------------------------------------------------------- /lib/data_magic/index/super_client.rb: -------------------------------------------------------------------------------- 1 | require 'forwardable' 2 | 3 | module DataMagic 4 | module Index 5 | class SuperClient 6 | attr_reader :client, :options 7 | 8 | def initialize(client, options) 9 | @client = client 10 | @options = options 11 | end 12 | 13 | def create_index 14 | DataMagic.create_index unless config.index_exists? 15 | end 16 | 17 | def refresh_index 18 | client.indices.refresh index: index_name 19 | end 20 | 21 | def creating? 22 | options[:root] || options[:nest] == nil 23 | end 24 | 25 | def nested_partial? 26 | options[:map] 27 | end 28 | 29 | def allow_skips? 30 | options[:nest][:parent_missing] == 'skip' 31 | end 32 | 33 | def index_name 34 | config.scoped_index_name 35 | end 36 | 37 | def config 38 | DataMagic.config 39 | end 40 | 41 | extend Forwardable 42 | 43 | def_delegators :client, :index, :update 44 | end 45 | end 46 | end 47 | -------------------------------------------------------------------------------- /lib/expression/eval.rb: -------------------------------------------------------------------------------- 1 | 2 | class Expression 3 | class Eval < Parslet::Transform 4 | rule(:var => simple(:var)) { 5 | variables[String(var)] 6 | } 7 | 8 | # in Ruby 0 is 'truthy' but that's not what most people expect 9 | rule(:or => { :left => subtree(:left), :right => subtree(:right) }) do 10 | left == 0 ? right : (left or right) 11 | end 12 | 13 | rule(:and => { :left => subtree(:left), :right => subtree(:right) }) do 14 | left == 0 ? left : (left and right) 15 | end 16 | end 17 | end 18 | -------------------------------------------------------------------------------- /lib/expression/expression.rb: -------------------------------------------------------------------------------- 1 | require_relative 'parser' 2 | require_relative 'eval' 3 | require_relative 'variables' 4 | require 'hashie' 5 | 6 | class Expression 7 | attr_accessor :name # purely for reporting Errors 8 | attr_reader :variables 9 | 10 | def initialize(expr, name = 'unknown') 11 | @tree = Parser.new.parse(expr) 12 | @variables = Variables.new.apply(@tree) 13 | end 14 | 15 | def evaluate(vars) 16 | Hashie.stringify_keys! vars 17 | Eval.new.apply(@tree, variables: vars) 18 | end 19 | 20 | def self.find_or_create(expr, name = 'unknown') 21 | @cached_expression ||= {} 22 | @cached_expression[expr] ||= Expression.new(expr, name) 23 | @cached_expression[expr] 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /lib/expression/parser.rb: -------------------------------------------------------------------------------- 1 | require 'parslet' 2 | # based on https://github.com/kschiess/parslet/blob/master/example/boolean_algebra.rb 3 | # usage: 4 | # def parse(str) 5 | # ExpressionParser.new.parse(str) 6 | # 7 | # rescue Parslet::ParseFailed => failure 8 | # puts failure.cause.ascii_tree 9 | # end 10 | # 11 | # tree = ExpressionParser.new.parse("one or two") 12 | # => {:or=>{:left=>{:var=>"one"@0}, :right=>{:var=>"two"@7}}} 13 | # Eval.new.apply(tree, variables: {"one"=>1, "two"=>2}) 14 | # 15 | # Variables.new.apply(tree) 16 | 17 | class Expression 18 | class Parser < Parslet::Parser 19 | rule(:space) { match[" "].repeat(1) } 20 | rule(:space?) { space.maybe } 21 | 22 | rule(:lparen) { str("(") >> space? } 23 | rule(:rparen) { str(")") >> space? } 24 | 25 | rule(:and_operator) { str("and") >> space? } 26 | rule(:or_operator) { str("or") >> space? } 27 | 28 | rule(:var) { match["[^\s\(\)]"].repeat(1).as(:var) >> space? } 29 | 30 | # The primary rule deals with parentheses. 31 | rule(:primary) { lparen >> or_operation >> rparen | var } 32 | 33 | # Note that following rules are both right-recursive. 34 | rule(:and_operation) { 35 | (primary.as(:left) >> and_operator >> 36 | and_operation.as(:right)).as(:and) | 37 | primary } 38 | 39 | rule(:or_operation) { 40 | (and_operation.as(:left) >> or_operator >> 41 | or_operation.as(:right)).as(:or) | 42 | and_operation } 43 | 44 | # We start at the lowest precedence rule. 45 | root(:or_operation) 46 | end 47 | end 48 | -------------------------------------------------------------------------------- /lib/expression/variables.rb: -------------------------------------------------------------------------------- 1 | require 'parslet' 2 | 3 | class Expression 4 | class Variables < Parslet::Transform 5 | rule(:var => simple(:var)) { 6 | [String(var)] 7 | } 8 | rule(:or => { :left => subtree(:left), :right => subtree(:right) }) do 9 | (left + right) 10 | end 11 | 12 | rule(:and => { :left => subtree(:left), :right => subtree(:right) }) do 13 | (left + right) 14 | end 15 | 16 | end 17 | end 18 | -------------------------------------------------------------------------------- /lib/nested_hash.rb: -------------------------------------------------------------------------------- 1 | class NestedHash < Hash 2 | 3 | def initialize(hash = {}, default = nil, &block) 4 | default ? super(default) : super(&block) 5 | self.add(hash) 6 | end 7 | 8 | def add(hash) 9 | hash.each do |full_name, value| 10 | parts = full_name.to_s.split('.') 11 | last = parts.length - 1 12 | add_to = self 13 | parts.each_with_index do |name, index| 14 | if index == last 15 | add_to[name] = value 16 | else 17 | add_to[name] ||= {} 18 | add_to = add_to[name] 19 | end 20 | end 21 | end 22 | self 23 | end 24 | 25 | # generate a flat, non-nested hash 26 | # with keys that have dots representing the hierarchy 27 | def withdotkeys(deep_hash = self, flat_hash = {}, root = '') 28 | deep_hash.each do |k, value| 29 | key = root + k 30 | if value.is_a?(Hash) 31 | flat_hash.merge! withdotkeys(value, flat_hash, key + '.') 32 | else 33 | flat_hash[key] = value 34 | end 35 | end 36 | flat_hash 37 | end 38 | 39 | # generate a list of the keys with dots representing the hierarchy 40 | def dotkeys(row = self, prefix = '') 41 | human_names = [] 42 | row.keys.each do |k| 43 | key = prefix + k 44 | if row[k].is_a?(Hash) 45 | new_human_names = dotkeys(row[k], key + '.') 46 | human_names += new_human_names 47 | else 48 | human_names << key 49 | end 50 | end 51 | human_names 52 | end 53 | 54 | # set a new or existing nested key's value by a dotted-string key 55 | def dotkey_set(dottedkey, value, deep_hash = self) 56 | keys = dottedkey.to_s.split('.') 57 | first = keys.first 58 | if keys.length == 1 59 | deep_hash[first] = value 60 | else 61 | # in the case that we are creating a hash from a dotted key, we'll assign a default 62 | deep_hash[first] = (deep_hash[first] || {}) 63 | dotkey_set(keys.slice(1..-1).join('.'), value, deep_hash[first]) 64 | end 65 | end 66 | end 67 | -------------------------------------------------------------------------------- /lib/sass_initializer.rb: -------------------------------------------------------------------------------- 1 | module SassInitializer 2 | def self.registered(app) 3 | # Enables support for SASS template reloading in rack applications. 4 | # See http://nex-3.com/posts/88-sass-supports-rack for more details. 5 | # Store SASS files (by default) within 'app/stylesheets'. 6 | require 'sass/plugin/rack' 7 | Sass::Plugin.options[:template_location] = Padrino.root("app/stylesheets") 8 | Sass::Plugin.options[:css_location] = Padrino.root("public/stylesheets") 9 | app.use Sass::Plugin::Rack 10 | end 11 | end 12 | -------------------------------------------------------------------------------- /lib/zipcode/zipcode.rb: -------------------------------------------------------------------------------- 1 | # Zipcode latitude and longitude data in us_zipcodes.txt 2 | # provided by [GeoNames](http://www.geonames.org/) 3 | # under under a Creative Commons Attribution 3.0 License: 4 | # http://creativecommons.org/licenses/by/3.0/ 5 | 6 | # this code is in public domain (CC0 1.0) 7 | # https://github.com/18F/open-data-maker/blob/dev/LICENSE.md 8 | 9 | require 'csv' 10 | 11 | class Zipcode 12 | @@zipcode_hash = nil 13 | 14 | def Zipcode.latlon(zipcode) 15 | zipcode = zipcode.to_s 16 | @@zipcode_hash ||= converted_zipcodes 17 | @@zipcode_hash[zipcode] 18 | end 19 | 20 | def Zipcode.valid?(zipcode) 21 | !!self.latlon(zipcode) 22 | end 23 | 24 | private 25 | def self.converted_zipcodes 26 | parsed_file = CSV.read(File.expand_path("../us_zipcodes.txt", __FILE__), { :col_sep => "\t" }) 27 | zipcode_hash = {} 28 | parsed_file.each do |row| 29 | zipcode = row[1] 30 | lat = row[9].to_f 31 | lon = row[10].to_f 32 | zipcode_hash[zipcode] = {'lat': lat, 'lon': lon} 33 | end 34 | zipcode_hash 35 | end 36 | 37 | end 38 | -------------------------------------------------------------------------------- /log/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/log/.gitkeep -------------------------------------------------------------------------------- /public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/public/favicon.ico -------------------------------------------------------------------------------- /public/javascripts/application.js: -------------------------------------------------------------------------------- 1 | // Put your application scripts here -------------------------------------------------------------------------------- /public/javascripts/jquery-ujs.js: -------------------------------------------------------------------------------- 1 | /* 2 | * Padrino Javascript Jquery Adapter 3 | * Created for use with Padrino Ruby Web Framework (http://www.padrinorb.com) 4 | **/ 5 | 6 | /* Remote Form Support 7 | * form_for @user, '/user', :remote => true 8 | **/ 9 | 10 | $(function(){ 11 | $('form').on('submit', function(e) { 12 | var element = $(this), message = element.data('confirm'); 13 | if (message && !confirm(message)) { return false; } 14 | if (element.data('remote') == true) { 15 | e.preventDefault(); e.stopped = true; 16 | JSAdapter.sendRequest(element, { 17 | verb: element.data('method') || element.attr('method') || 'post', 18 | url: element.attr('action'), 19 | dataType: element.data('type') || ($.ajaxSettings && $.ajaxSettings.dataType) || 'script', 20 | params: element.serializeArray() 21 | }); 22 | } 23 | }); 24 | 25 | /* Confirmation Support 26 | * link_to 'sign out', '/logout', :confirm => 'Log out?' 27 | **/ 28 | 29 | $(document).on('click', 'a[data-confirm]', function(e) { 30 | var message = $(this).data('confirm'); 31 | if (!confirm(message)) { e.preventDefault(); e.stopped = true; } 32 | }); 33 | 34 | /* 35 | * Link Remote Support 36 | * link_to 'add item', '/create', :remote => true 37 | **/ 38 | 39 | $(document).on('click', 'a[data-remote=true]', function(e) { 40 | var element = $(this); 41 | if (e.stopped) return; 42 | e.preventDefault(); e.stopped = true; 43 | JSAdapter.sendRequest(element, { 44 | verb: element.data('method') || 'get', 45 | url: element.attr('href') 46 | }); 47 | }); 48 | 49 | /* 50 | * Link Method Support 51 | * link_to 'delete item', '/destroy', :method => :delete 52 | **/ 53 | 54 | $(document).on('click', 'a[data-method]:not([data-remote])', function(e) { 55 | if (e.stopped) return; 56 | JSAdapter.sendMethod($(this)); 57 | e.preventDefault(); e.stopped = true; 58 | }); 59 | 60 | /* JSAdapter */ 61 | var JSAdapter = { 62 | // Sends an xhr request to the specified url with given verb and params 63 | // JSAdapter.sendRequest(element, { verb: 'put', url : '...', params: {} }); 64 | sendRequest: function(element, options) { 65 | var verb = options.verb, url = options.url, params = options.params, dataType = options.dataType; 66 | var event = element.trigger('ajax:before'); 67 | if (event.stopped) return false; 68 | $.ajax({ 69 | url: url, 70 | type: verb.toUpperCase() || 'POST', 71 | data: params || [], 72 | dataType: dataType, 73 | 74 | beforeSend: function(request) { element.trigger('ajax:loading', [ request ]); }, 75 | complete: function(request) { element.trigger('ajax:complete', [ request ]); }, 76 | success: function(request) { element.trigger('ajax:success', [ request ]); }, 77 | error: function(request) { element.trigger('ajax:failure', [ request ]); } 78 | }); 79 | element.trigger('ajax:after'); 80 | }, 81 | // Triggers a particular method verb to be triggered in a form posting to the url 82 | // JSAdapter.sendMethod(element); 83 | sendMethod: function(element) { 84 | var verb = element.data('method'); 85 | var url = element.attr('href'); 86 | var form = $('
'); 87 | var csrf_token = $('meta[name=csrf-token]').attr('content'); 88 | var csrf_param = $('meta[name=csrf-param]').attr('content'); 89 | form.hide().appendTo('body'); 90 | if (verb !== 'post') { 91 | var field = ''; 92 | form.append(field); 93 | } 94 | if (csrf_param !== undefined && csrf_token !== undefined) { 95 | var field = ''; 96 | form.append(field); 97 | } 98 | form.submit(); 99 | } 100 | }; 101 | 102 | // Every xhr request is sent along with the CSRF token. 103 | $.ajaxPrefilter(function(options, originalOptions, xhr) { 104 | if (options.verb !== 'GET') { 105 | var token = $('meta[name="csrf-token"]').attr('content'); 106 | if (token) xhr.setRequestHeader('X-CSRF-Token', token); 107 | } 108 | }); 109 | }); 110 | -------------------------------------------------------------------------------- /sample-data/data.yaml: -------------------------------------------------------------------------------- 1 | version: cities100-2010 2 | # cities100.txt 3 | # National Places Gazetteer Files, from US Census 2010 4 | # https://www.census.gov/geo/maps-data/data/gazetteer2010.html 5 | # (head -n 1 source.txt && tail -n +2 source.txt | LC_ALL=C sort -k7rn,7 -t$'\t' source.txt) > result.txt 6 | # head -n 101 results.txt > cities100.txt 7 | # then convertes to csv and removed " city" from after each city name 8 | index: city-data 9 | api: cities 10 | unique: ["name"] 11 | 12 | options: 13 | search: dictionary_only # API provides error when requesting fields not in dictionary 14 | 15 | dictionary: 16 | id: 17 | source: GEOID 18 | description: > 19 | Geographic Identifier - fully concatenated geographic code (State FIPS and 20 | County FIPS). The Census Bureau and other state and federal agencies are 21 | responsible for assigning geographic identifiers, or GEOIDs, to geographic 22 | entities to facilitate the organization, presentation, and 23 | exchange of geographic and statistical data. GEOIDs are numeric codes that 24 | uniquely identify all administrative/legal and statistical geographic areas for 25 | which the Census Bureau tabulates data. From Alaska, the largest state, 26 | to the smallest census block in New York City, every geographic area 27 | has a unique GEOID. Data users rely on GEOIDs to join the appropriate 28 | demographic data from censuses and surveys, such as the 29 | American Community Survey (ACS), to various levels of geography for data 30 | analysis, interpretation and mapping. 31 | code: 32 | source: ANSICODE 33 | description: > 34 | American National Standards Institute codes (ANSI codes) 35 | are standardized numeric or alphabetic codes issued by the American 36 | National Standards Institute (ANSI) to ensure uniform identification of 37 | geographic entities through all federal government agencies. 38 | name: 39 | source: NAME 40 | description: The name of the city 41 | type: literal 42 | state: 43 | source: USPS 44 | description: Two letter state abbreviation 45 | population: 46 | source: POP10 47 | description: City population from 2010 Census data 48 | type: integer 49 | location.lat: INTPTLAT 50 | location.lon: INTPTLONG 51 | land_area: 52 | source: ALAND_SQMI 53 | description: Land Area (square miles) 54 | type: float 55 | area.water: 56 | description: Water Area (square miles) 57 | source: AWATER_SQMI 58 | type: float 59 | 60 | categories: 61 | general: 62 | title: General 63 | description: > 64 | general information about the city, including standard 65 | identifiers and actual census summary data about the population of the city. 66 | fields: [id, code, name, state, population] 67 | geographic: 68 | title: Geographic 69 | description: > 70 | Geographic characteristics of the area. These are created for 71 | statistical purposes only. Depiction and designation for statistical 72 | purposes does not constitute a determination of jurisdictional authority 73 | or rights of ownership or entitlement. 74 | fields: [location, area.land, area.water] 75 | general2: 76 | title: General2 77 | description: > 78 | general information about the city, including standard 79 | identifiers and actual census summary data about the population of the city. 80 | fields: [id, code, name, state, population] 81 | general3: 82 | title: General3 83 | description: > 84 | short 85 | fields: [id, code, name, state, population] 86 | general4: 87 | title: General4 88 | description: > 89 | short 90 | fields: [id, code, name, state, population] 91 | general5: 92 | title: General5 93 | description: > 94 | general information about the city, including standard 95 | identifiers and actual census summary data about the population of the city. 96 | fields: [id, code, name, state, population] 97 | 98 | files: 99 | - name: cities100.csv 100 | -------------------------------------------------------------------------------- /script/bomstrip.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | mkdir -p new 3 | 4 | for filename in ./*.csv; do 5 | awk 'NR==1{sub(/^\xef\xbb\xbf/,"")}1' "$filename" > new/$filename 6 | done 7 | 8 | #find . -print0 -type f | awk 'NR==1{sub(/^\xef\xbb\xbf/,"")}1' {} > new/{} 9 | -------------------------------------------------------------------------------- /script/bootstrap: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | fancy_echo() { 6 | local fmt="$1"; shift 7 | 8 | # shellcheck disable=SC2059 9 | printf "\n$fmt\n" "$@" 10 | } 11 | 12 | brew_install_or_upgrade() { 13 | if brew_is_installed "$1"; then 14 | if brew_is_upgradable "$1"; then 15 | fancy_echo "Upgrading %s ..." "$1" 16 | brew upgrade "$@" 17 | else 18 | fancy_echo "Already using the latest version of %s. Skipping ..." "$1" 19 | fi 20 | else 21 | fancy_echo "Installing %s ..." "$1" 22 | brew install "$@" 23 | fi 24 | } 25 | 26 | brew_is_installed() { 27 | brew list -1 | grep -Fqx "$1" 28 | } 29 | 30 | brew_is_upgradable() { 31 | ! brew outdated --quiet "$1" >/dev/null 32 | } 33 | 34 | brew_tap_is_installed() { 35 | brew tap | grep -Fqx "$1" 36 | } 37 | 38 | brew_tap() { 39 | if ! brew_tap_is_installed "$1"; then 40 | fancy_echo "Tapping $1..." 41 | brew tap "$1" 2> /dev/null 42 | fi 43 | } 44 | 45 | echo 'Installing dependencies...' 46 | 47 | if command -v brew >/dev/null; then 48 | brew update 49 | 50 | brew_tap 'homebrew/services' 51 | brew_tap 'homebrew/versions' 52 | brew_install_or_upgrade 'elasticsearch17' 53 | 54 | brew services restart elasticsearch17 55 | 56 | # elasticsearch takes several seconds to load 57 | sleep 10 58 | fi 59 | 60 | gem install bundler --conservative 61 | bundle check || bundle install 62 | 63 | echo "All done!" 64 | -------------------------------------------------------------------------------- /script/makeutf8.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # convert to utf8 and strip Byte Order Mark (BOM) if present 3 | mkdir -p utf8 4 | 5 | for file in *.csv; do 6 | echo "$file" 7 | iconv -f ascii -t utf-8 "$file" | awk 'NR==1{sub(/^\xef\xbb\xbf/,"")}1' > "./utf8/${file%.txt}" 8 | done 9 | -------------------------------------------------------------------------------- /script/s3config.rb: -------------------------------------------------------------------------------- 1 | # configure S3 with local credentials based on environment 2 | # usage (from ruby script or irb): 3 | # require 's3config.rb' 4 | # @s3 = ::Aws::S3::Client.new 5 | 6 | require 'dotenv' 7 | 8 | branch = `echo $(git symbolic-ref --short HEAD)`.chomp 9 | 10 | if ENV['APP_ENV'] 11 | APP_ENV = ENV['APP_ENV'] 12 | puts "using APP_ENV from environment #{APP_ENV}" 13 | else 14 | case branch 15 | when "master" 16 | APP_ENV = "production" 17 | when "staging" 18 | APP_ENV = "staging" 19 | else 20 | puts "not on master or staging branch lets use dev" 21 | APP_ENV = "dev" 22 | end 23 | end 24 | 25 | Dotenv.load( 26 | File.expand_path("../../.#{APP_ENV}.env", __FILE__), 27 | File.expand_path("../../.env", __FILE__)) 28 | 29 | require 'aws-sdk' 30 | puts "app env: #{APP_ENV}" 31 | puts "bucket name: #{ENV['s3_bucket']}" 32 | 33 | 34 | s3cred = {'access_key'=> ENV['s3_access_key'], 'secret_key' => ENV['s3_secret_key']} 35 | 36 | ::Aws.config[:credentials] = ::Aws::Credentials.new(s3cred['access_key'], s3cred['secret_key']) 37 | ::Aws.config[:region] = 'us-east-1' 38 | -------------------------------------------------------------------------------- /script/s3pull: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby -v 2 | 3 | require_relative 's3config.rb' 4 | 5 | @s3 = ::Aws::S3::Client.new 6 | 7 | bucket = ENV['s3_bucket'] 8 | 9 | dirname = 'real-data' 10 | unless File.directory?(dirname) 11 | FileUtils.mkdir_p(dirname) 12 | end 13 | datayamlpath = File.expand_path("../../#{dirname}/#{bucket}.yaml", __FILE__) 14 | 15 | File.open(datayamlpath, 'w') do |file| 16 | response = @s3.get_object(bucket: bucket, key: 'data.yaml') 17 | file << response.body.read 18 | end 19 | -------------------------------------------------------------------------------- /script/s3push: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby -v 2 | 3 | require_relative 's3config.rb' 4 | 5 | @s3 = ::Aws::S3::Client.new 6 | 7 | dirname = 'real-data' 8 | bucket_name = ENV['s3_bucket'] 9 | datayamlpath = File.expand_path("../../#{dirname}/#{bucket_name}.yaml", __FILE__) 10 | 11 | puts "copying #{datayamlpath}" 12 | puts "to S3 #{bucket_name}" 13 | File.open(datayamlpath, 'r') do |file| 14 | @s3.put_object(bucket: bucket_name, key: 'data.yaml', body: file) 15 | end 16 | -------------------------------------------------------------------------------- /spec/features/web_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'app', type: 'feature' do 4 | before do 5 | DataMagic.destroy 6 | ENV['DATA_PATH'] = './spec/fixtures/sample-data' 7 | DataMagic.init(load_now: true) 8 | end 9 | 10 | after do 11 | DataMagic.destroy 12 | end 13 | 14 | it "should load the home page" do 15 | get '/' 16 | expect(last_response).to be_ok 17 | end 18 | 19 | it "should display links to endpoints" do 20 | get '/' 21 | expect(last_response.body).to include 'cities' 22 | end 23 | 24 | it "should display a list of categories" do 25 | get '/' 26 | expect(last_response.body).to include('Browse Data Details by Category') 27 | expect(last_response.body).to include('General') # category name 28 | expect(last_response.body).to include('general information about the city, including standard identifiers') 29 | end 30 | 31 | it "should load the correct category page" do 32 | get '/category/general' 33 | expect(last_response.body).to include('Data Details for the') 34 | expect(last_response.body).to include('category_entry = {"title":"General"') 35 | expect(last_response.body).to include('population') # a field name 36 | expect(last_response.body).to include('The name of the city') # a field description 37 | expect(last_response.body).to include('literal') # field type 38 | end 39 | end 40 | -------------------------------------------------------------------------------- /spec/fixtures/bom/bom.csv: -------------------------------------------------------------------------------- 1 | UNITID,VAL 2 | 100654,00100200 3 | 100663,00105200 4 | 100690,02503400 5 | 100706,00105500 6 | 100724,00100500 7 | -------------------------------------------------------------------------------- /spec/fixtures/bom/data.yaml: -------------------------------------------------------------------------------- 1 | version: byte-order-mark 2 | index: test-data 3 | api: test 4 | dictionary: 5 | id: UNITID 6 | value: VAL 7 | 8 | files: 9 | - name: bom.csv 10 | -------------------------------------------------------------------------------- /spec/fixtures/calculated_columns/data.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | version: Aug6-2015-08-10-23:48-0600 3 | api: fakeschool 4 | index: fakeschool-data 5 | unique: 6 | - id 7 | options: 8 | limit_files: 1 9 | limit_rows: 100 10 | 11 | dictionary: 12 | id: 13 | source: UNITID 14 | type: integer 15 | description: Unit ID for institution 16 | school.name: 17 | source: INSTNM 18 | description: Institution name 19 | integer1: 20 | source: INT1 21 | type: integer 22 | integer2: 23 | source: INT2 24 | type: integer 25 | integer3: 26 | source: INT3 27 | type: integer 28 | integer4: 29 | source: INT4 30 | type: integer 31 | summarybool: 32 | calculate: INT1 or INT2 or INT3 or INT4 33 | type: boolean 34 | description: are any of the unparsed booleans true? 35 | 36 | files: 37 | - name: schools.csv 38 | -------------------------------------------------------------------------------- /spec/fixtures/calculated_columns/schools.csv: -------------------------------------------------------------------------------- 1 | UNITID,INSTNM,INT1,INT2,INT3,INT4 2 | 1,Big School,0,0,2,0 3 | 2,Small School,0,0,0,0 4 | 3,Middle School,0,1,1,0 5 | -------------------------------------------------------------------------------- /spec/fixtures/cities_with_yml/cities51-100.csv: -------------------------------------------------------------------------------- 1 | "USPS","GEOID","ANSICODE","NAME","LSAD","FUNCSTAT","POP10","HU10","ALAND","AWATER","ALAND_SQMI","AWATER_SQMI","INTPTLAT","INTPTLONG" 2 | "TX","4804000","02409731","Arlington","25","A","365438","144805","248332497","9690024","95.882","3.741","32.700708","-97.124691" 3 | "CA","0603526","02409774","Bakersfield","25","A","347483","120725","368204317","3741691","142.164","1.445","35.321213","-119.018291" 4 | "LA","2255000","00545142","New Orleans","25","A","343829","189896","438803381","468240430","169.423","180.789","30.068636","-89.939007" 5 | "HI","1571550","02630783","Urban Honolulu CDP","57","S","337256","143173","156748036","20484151","60.521","7.909","21.325852","-157.845315" 6 | "CA","0602000","02409704","Anaheim","25","A","336265","104237","129073275","2526668","49.835","0.976","33.855497","-117.760071" 7 | "FL","1271000","02405568","Tampa","25","A","335709","157130","293727878","160127838","113.409","61.826","27.970086","-82.479673" 8 | "CO","0804000","02409757","Aurora","25","A","325078","131040","400759192","1806832","154.734","0.698","39.688002","-104.689740" 9 | "CA","0669000","02411814","Santa Ana","25","A","324528","76896","70627761","643479","27.270","0.248","33.736478","-117.882593" 10 | "MO","2965000","00767557","St. Louis","25","A","319294","176002","160343174","10683076","61.909","4.125","38.635699","-90.244582" 11 | "PA","4261000","01214818","Pittsburgh","25","A","305704","156165","143399923","7693613","55.367","2.971","40.439753","-79.976592" 12 | "TX","4817000","02410234","Corpus Christi","25","A","305215","125469","415982136","852055055","160.612","328.980","27.754252","-97.173385" 13 | "CA","0662000","02410965","Riverside","25","A","303871","98444","210152356","788400","81.140","0.304","33.938143","-117.393168" 14 | "OH","3915000","01086201","Cincinnati","25","A","296943","161095","201869928","4155439","77.942","1.604","39.139902","-84.506446" 15 | "KY","2146027","02405089","Lexington-Fayette urban county","UC","A","295803","135160","734648526","4922803","283.649","1.901","38.040157","-84.458443" 16 | "AK","0203000","02419025","Anchorage municipality","37","A","291826","113032","4415108963","663860984","1704.683","256.318","61.177549","-149.274354" 17 | "CA","0675000","02411987","Stockton","25","A","291707","99637","159723404","7984682","61.670","3.083","37.976342","-121.313304" 18 | "OH","3977000","01086537","Toledo","25","A","287208","138039","208991246","8889079","80.692","3.432","41.664071","-83.581861" 19 | "MN","2758000","02396511","St. Paul","25","A","285068","120795","134623737","10875208","51.979","4.199","44.948869","-93.103855" 20 | "NJ","3451000","00885317","Newark","25","A","277140","109520","62643850","4972876","24.187","1.920","40.724220","-74.172574" 21 | "NC","3728000","02403745","Greensboro","25","A","269666","124074","327673360","13690607","126.515","5.286","36.096483","-79.827108" 22 | "NY","3611000","00978764","Buffalo","25","A","261310","133444","104594197","31364094","40.384","12.110","42.892492","-78.859686" 23 | "TX","4858016","02411437","Plano","25","A","259841","103672","185394655","937663","71.581","0.362","33.050769","-96.747944" 24 | "NE","3128000","02395713","Lincoln","25","A","258379","110546","230804010","3229386","89.114","1.247","40.808957","-96.680354" 25 | "NV","3231900","02410741","Henderson","25","A","257729","113586","279023542","0","107.732","0.000","36.012233","-115.037462" 26 | "IN","1825000","02394798","Fort Wayne","25","A","253691","113541","286500436","553423","110.618","0.214","41.088173","-85.143880" 27 | "NJ","3436000","00885264","Jersey","25","A","247597","108720","38315542","16280557","14.794","6.286","40.711417","-74.064760" 28 | "FL","1263000","02405401","St. Petersburg","25","A","244769","129401","159909751","196473878","61.742","75.859","27.761976","-82.644055" 29 | "CA","0613392","02409461","Chula Vista","25","A","243916","79416","128544675","6380068","49.631","2.463","32.627670","-117.015170" 30 | "VA","5157000","01498557","Norfolk","25","A","242803","95018","140171293","109376999","54.120","42.231","36.923015","-76.244641" 31 | "FL","1253000","02404443","Orlando","25","A","238300","121254","265203107","21469603","102.395","8.289","28.415886","-81.298750" 32 | "AZ","0412000","02409433","Chandler","25","A","236123","94404","166828220","289715","64.413","0.112","33.282874","-111.854943" 33 | "TX","4841464","02411626","Laredo","25","A","236091","68610","230271380","3754983","88.908","1.450","27.547681","-99.486931" 34 | "WI","5548000","01583625","Madison","25","A","233209","108843","198882058","44658619","76.789","17.243","43.087806","-89.430121" 35 | "NC","3775000","02405771","Winston-Salem","25","A","229617","103974","343041264","3228612","132.449","1.247","36.103262","-80.260578" 36 | "TX","4845000","02410892","Lubbock","25","A","229573","95926","317041399","2962034","122.410","1.144","33.566479","-101.886677" 37 | "LA","2205000","02403821","Baton Rouge","25","B","229493","100801","199291656","5588234","76.947","2.158","30.448454","-91.125899" 38 | "NC","3719000","02403521","Durham","25","A","228330","103221","278087581","2357401","107.370","0.910","35.980964","-78.905647" 39 | "TX","4829000","02410572","Garland","25","A","226876","80834","147848881","340126","57.085","0.131","32.909826","-96.630357" 40 | "AZ","0427820","02410596","Glendale","25","A","226721","90505","155337275","401624","59.976","0.155","33.533111","-112.189901" 41 | "NV","3260600","02410923","Reno","25","A","225221","102582","266792840","7423507","103.009","2.866","39.474487","-119.776538" 42 | "FL","1230000","02404689","Hialeah","25","A","224669","74067","55554697","3599730","21.450","1.390","25.869941","-80.302865" 43 | "NV","3254600","02409023","Paradise CDP","57","S","223167","114296","120996826","0","46.717","0.000","36.080689","-115.136839" 44 | "VA","5116000","01498558","Chesapeake","25","A","222209","83196","882669156","26052854","340.800","10.059","36.679376","-76.301788" 45 | "AZ","0465000","02411845","Scottsdale","25","A","217385","124001","476350341","1231086","183.920","0.475","33.668727","-111.823682" 46 | "NV","3251800","02411273","North Las Vegas","25","A","216961","76073","262483131","112001","101.345","0.043","36.282974","-115.089262" 47 | "TX","4837000","02410117","Irving","25","A","216290","91128","173573892","2594600","67.017","1.002","32.857748","-96.970022" 48 | "CA","0626000","02410545","Fremont","25","A","214089","73989","200617968","26291598","77.459","10.151","37.494373","-121.941117" 49 | "CA","0636770","02410116","Irvine","25","A","212375","83899","171214072","900908","66.106","0.348","33.678399","-117.771254" 50 | "AL","0107000","02403868","Birmingham","25","A","212237","108981","378310927","6590665","146.067","2.545","33.527444","-86.799047" 51 | "NY","3663000","00979426","Rochester","25","A","210565","97158","92671789","3558427","35.781","1.374","43.169927","-77.616891" 52 | -------------------------------------------------------------------------------- /spec/fixtures/cities_with_yml/data.yml: -------------------------------------------------------------------------------- 1 | # cities100.txt 2 | # National Places Gazetteer Files, from US Census 2010 3 | # https://www.census.gov/geo/maps-data/data/gazetteer2010.html 4 | # (head -n 1 source.txt && tail -n +2 source.txt | LC_ALL=C sort -k7rn,7 -t$'\t' source.txt) > result.txt 5 | # head -n 101 results.txt > cities100.txt 6 | # then convertes to csv and removed " city" from after each city name 7 | version: fixture-import-all 8 | index: city-data 9 | api: cities 10 | global_mapping: 11 | USPS: state 12 | NAME: name 13 | POP10: population 14 | INTPTLAT: latitude 15 | INTPTLONG: longitude 16 | 17 | files: 18 | - name: cities50.csv 19 | add: 20 | category: 'top50' 21 | - name: cities51-100.csv 22 | -------------------------------------------------------------------------------- /spec/fixtures/cities_with_yml/more.csv: -------------------------------------------------------------------------------- 1 | state,city,lat,lon 2 | CA,Secret City,37.727239,-123.032229 3 | -------------------------------------------------------------------------------- /spec/fixtures/cities_without_yml/cities51-100.csv: -------------------------------------------------------------------------------- 1 | "USPS","GEOID","ANSICODE","NAME","LSAD","FUNCSTAT","POP10","HU10","ALAND","AWATER","ALAND_SQMI","AWATER_SQMI","INTPTLAT","INTPTLONG" 2 | "TX","4804000","02409731","Arlington","25","A","365438","144805","248332497","9690024","95.882","3.741","32.700708","-97.124691" 3 | "CA","0603526","02409774","Bakersfield","25","A","347483","120725","368204317","3741691","142.164","1.445","35.321213","-119.018291" 4 | "LA","2255000","00545142","New Orleans","25","A","343829","189896","438803381","468240430","169.423","180.789","30.068636","-89.939007" 5 | "HI","1571550","02630783","Urban Honolulu CDP","57","S","337256","143173","156748036","20484151","60.521","7.909","21.325852","-157.845315" 6 | "CA","0602000","02409704","Anaheim","25","A","336265","104237","129073275","2526668","49.835","0.976","33.855497","-117.760071" 7 | "FL","1271000","02405568","Tampa","25","A","335709","157130","293727878","160127838","113.409","61.826","27.970086","-82.479673" 8 | "CO","0804000","02409757","Aurora","25","A","325078","131040","400759192","1806832","154.734","0.698","39.688002","-104.689740" 9 | "CA","0669000","02411814","Santa Ana","25","A","324528","76896","70627761","643479","27.270","0.248","33.736478","-117.882593" 10 | "MO","2965000","00767557","St. Louis","25","A","319294","176002","160343174","10683076","61.909","4.125","38.635699","-90.244582" 11 | "PA","4261000","01214818","Pittsburgh","25","A","305704","156165","143399923","7693613","55.367","2.971","40.439753","-79.976592" 12 | "TX","4817000","02410234","Corpus Christi","25","A","305215","125469","415982136","852055055","160.612","328.980","27.754252","-97.173385" 13 | "CA","0662000","02410965","Riverside","25","A","303871","98444","210152356","788400","81.140","0.304","33.938143","-117.393168" 14 | "OH","3915000","01086201","Cincinnati","25","A","296943","161095","201869928","4155439","77.942","1.604","39.139902","-84.506446" 15 | "KY","2146027","02405089","Lexington-Fayette urban county","UC","A","295803","135160","734648526","4922803","283.649","1.901","38.040157","-84.458443" 16 | "AK","0203000","02419025","Anchorage municipality","37","A","291826","113032","4415108963","663860984","1704.683","256.318","61.177549","-149.274354" 17 | "CA","0675000","02411987","Stockton","25","A","291707","99637","159723404","7984682","61.670","3.083","37.976342","-121.313304" 18 | "OH","3977000","01086537","Toledo","25","A","287208","138039","208991246","8889079","80.692","3.432","41.664071","-83.581861" 19 | "MN","2758000","02396511","St. Paul","25","A","285068","120795","134623737","10875208","51.979","4.199","44.948869","-93.103855" 20 | "NJ","3451000","00885317","Newark","25","A","277140","109520","62643850","4972876","24.187","1.920","40.724220","-74.172574" 21 | "NC","3728000","02403745","Greensboro","25","A","269666","124074","327673360","13690607","126.515","5.286","36.096483","-79.827108" 22 | "NY","3611000","00978764","Buffalo","25","A","261310","133444","104594197","31364094","40.384","12.110","42.892492","-78.859686" 23 | "TX","4858016","02411437","Plano","25","A","259841","103672","185394655","937663","71.581","0.362","33.050769","-96.747944" 24 | "NE","3128000","02395713","Lincoln","25","A","258379","110546","230804010","3229386","89.114","1.247","40.808957","-96.680354" 25 | "NV","3231900","02410741","Henderson","25","A","257729","113586","279023542","0","107.732","0.000","36.012233","-115.037462" 26 | "IN","1825000","02394798","Fort Wayne","25","A","253691","113541","286500436","553423","110.618","0.214","41.088173","-85.143880" 27 | "NJ","3436000","00885264","Jersey","25","A","247597","108720","38315542","16280557","14.794","6.286","40.711417","-74.064760" 28 | "FL","1263000","02405401","St. Petersburg","25","A","244769","129401","159909751","196473878","61.742","75.859","27.761976","-82.644055" 29 | "CA","0613392","02409461","Chula Vista","25","A","243916","79416","128544675","6380068","49.631","2.463","32.627670","-117.015170" 30 | "VA","5157000","01498557","Norfolk","25","A","242803","95018","140171293","109376999","54.120","42.231","36.923015","-76.244641" 31 | "FL","1253000","02404443","Orlando","25","A","238300","121254","265203107","21469603","102.395","8.289","28.415886","-81.298750" 32 | "AZ","0412000","02409433","Chandler","25","A","236123","94404","166828220","289715","64.413","0.112","33.282874","-111.854943" 33 | "TX","4841464","02411626","Laredo","25","A","236091","68610","230271380","3754983","88.908","1.450","27.547681","-99.486931" 34 | "WI","5548000","01583625","Madison","25","A","233209","108843","198882058","44658619","76.789","17.243","43.087806","-89.430121" 35 | "NC","3775000","02405771","Winston-Salem","25","A","229617","103974","343041264","3228612","132.449","1.247","36.103262","-80.260578" 36 | "TX","4845000","02410892","Lubbock","25","A","229573","95926","317041399","2962034","122.410","1.144","33.566479","-101.886677" 37 | "LA","2205000","02403821","Baton Rouge","25","B","229493","100801","199291656","5588234","76.947","2.158","30.448454","-91.125899" 38 | "NC","3719000","02403521","Durham","25","A","228330","103221","278087581","2357401","107.370","0.910","35.980964","-78.905647" 39 | "TX","4829000","02410572","Garland","25","A","226876","80834","147848881","340126","57.085","0.131","32.909826","-96.630357" 40 | "AZ","0427820","02410596","Glendale","25","A","226721","90505","155337275","401624","59.976","0.155","33.533111","-112.189901" 41 | "NV","3260600","02410923","Reno","25","A","225221","102582","266792840","7423507","103.009","2.866","39.474487","-119.776538" 42 | "FL","1230000","02404689","Hialeah","25","A","224669","74067","55554697","3599730","21.450","1.390","25.869941","-80.302865" 43 | "NV","3254600","02409023","Paradise CDP","57","S","223167","114296","120996826","0","46.717","0.000","36.080689","-115.136839" 44 | "VA","5116000","01498558","Chesapeake","25","A","222209","83196","882669156","26052854","340.800","10.059","36.679376","-76.301788" 45 | "AZ","0465000","02411845","Scottsdale","25","A","217385","124001","476350341","1231086","183.920","0.475","33.668727","-111.823682" 46 | "NV","3251800","02411273","North Las Vegas","25","A","216961","76073","262483131","112001","101.345","0.043","36.282974","-115.089262" 47 | "TX","4837000","02410117","Irving","25","A","216290","91128","173573892","2594600","67.017","1.002","32.857748","-96.970022" 48 | "CA","0626000","02410545","Fremont","25","A","214089","73989","200617968","26291598","77.459","10.151","37.494373","-121.941117" 49 | "CA","0636770","02410116","Irvine","25","A","212375","83899","171214072","900908","66.106","0.348","33.678399","-117.771254" 50 | "AL","0107000","02403868","Birmingham","25","A","212237","108981","378310927","6590665","146.067","2.545","33.527444","-86.799047" 51 | "NY","3663000","00979426","Rochester","25","A","210565","97158","92671789","3558427","35.781","1.374","43.169927","-77.616891" 52 | -------------------------------------------------------------------------------- /spec/fixtures/cities_without_yml/more.csv: -------------------------------------------------------------------------------- 1 | state,city,lat,lon 2 | CA,Secret City,37.727239,-123.032229 3 | -------------------------------------------------------------------------------- /spec/fixtures/data.rb: -------------------------------------------------------------------------------- 1 | # Ages adjusted for Springfield residents to average to 42 2 | # Heights randomly set to generate a max of 142 3 | def address_data 4 | @address_data ||= StringIO.new <<-eos 5 | name,address,city,age,height 6 | Paul,15 Penny Lane,Liverpool,10,142 7 | Michelle,600 Pennsylvania Avenue,Washington,12,1 8 | Marilyn,1313 Mockingbird Lane,Springfield,14,2 9 | Sherlock,221B Baker Street,London,16,123 10 | Clark,66 Lois Lane,Smallville,18,141 11 | Bart,742 Evergreen Terrace,Springfield,70,142 12 | Paul,19 N Square,Boston,70,55.2 13 | Peter,66 Parker Lane,New York,74,11.5123 14 | eos 15 | @address_data.rewind 16 | @address_data 17 | end 18 | 19 | def geo_data 20 | @geo_data ||= StringIO.new <<-eos 21 | state,city,lat,lon 22 | CA,San Francisco,37.727239,-123.032229 23 | NY,"New York",40.664274,-73.938500 24 | CA,"Los Angeles",34.019394,-118.410825 25 | IL,Chicago,41.837551,-87.681844 26 | TX,Houston,29.780472,-95.386342 27 | PA,Philadelphia,40.009376,-75.133346 28 | CA,"San Jose",37.296867,-121.819306 29 | MA,Boston,42.331960,-71.020173 30 | WA,Seattle,47.620499,-122.350876 31 | eos 32 | @geo_data.rewind 33 | @geo_data 34 | end 35 | -------------------------------------------------------------------------------- /spec/fixtures/geo/data.yaml: -------------------------------------------------------------------------------- 1 | 2 | 3 | # cities100.txt 4 | # National Places Gazetteer Files, from US Census 2010 5 | # https://www.census.gov/geo/maps-data/data/gazetteer2010.html 6 | # (head -n 1 source.txt && tail -n +2 source.txt | LC_ALL=C sort -k7rn,7 -t$'\t' source.txt) > result.txt 7 | # head -n 101 results.txt > cities100.txt 8 | # then convertes to csv and removed " city" from after each city name 9 | dictionary: 10 | city: city 11 | location.lat: lat 12 | location.lon: lon 13 | 14 | index: place-data 15 | api: places 16 | files: 17 | - name: places.csv 18 | -------------------------------------------------------------------------------- /spec/fixtures/geo/places.csv: -------------------------------------------------------------------------------- 1 | state,city,lat,lon 2 | CA,"San Francisco",37.727239,-123.032229 3 | NY,"New York",40.664274,-73.938500 4 | CA,"Los Angeles",34.019394,-118.410825 5 | IL,Chicago,41.837551,-87.681844 6 | TX,Houston,29.780472,-95.386342 7 | PA,Philadelphia,40.009376,-75.133346 8 | CA,"San Jose",37.296867,-121.819306 9 | MA,Boston,42.331960,-71.020173 10 | WA,Seattle,47.620499,-122.350876 11 | -------------------------------------------------------------------------------- /spec/fixtures/geo_no_files/data.yaml: -------------------------------------------------------------------------------- 1 | # data.yaml for geo tests 2 | index: place-data 3 | -------------------------------------------------------------------------------- /spec/fixtures/import_with_dictionary/data.yaml: -------------------------------------------------------------------------------- 1 | # cities100.txt 2 | # National Places Gazetteer Files, from US Census 2010 3 | # https://www.census.gov/geo/maps-data/data/gazetteer2010.html 4 | # (head -n 1 source.txt && tail -n +2 source.txt | LC_ALL=C sort -k7rn,7 -t$'\t' source.txt) > result.txt 5 | # head -n 101 results.txt > cities100.txt 6 | # then convertes to csv and removed " city" from after each city name 7 | version: fixture-import-all 8 | index: city-data 9 | api: cities 10 | dictionary: 11 | state: USPS 12 | name: NAME 13 | population: POP10 14 | latitude: INTPTLAT 15 | longitude: INTPTLONG 16 | 17 | files: 18 | - name: cities50.csv 19 | add: 20 | category: 'top50' 21 | - name: cities51-100.csv 22 | -------------------------------------------------------------------------------- /spec/fixtures/import_with_dictionary/more.csv: -------------------------------------------------------------------------------- 1 | state,city,lat,lon 2 | CA,Secret City,37.727239,-123.032229 3 | -------------------------------------------------------------------------------- /spec/fixtures/import_with_errors/cities4.csv: -------------------------------------------------------------------------------- 1 | USPS,GEOID,ANSICODE,NAME,POP10 2 | NY,3651000,2395220,New York,8175133 3 | CA,644000,2410877,Los Angeles,3792621 4 | IL,1714000,428803,Chicago,2695598 5 | TX,4835000,2410796,Houston,2099451 -------------------------------------------------------------------------------- /spec/fixtures/import_with_errors/data.yaml: -------------------------------------------------------------------------------- 1 | version: fixture-type-error 2 | index: expect-errors 3 | api: nothing 4 | 5 | dictionary: 6 | state: USPS 7 | name: NAME 8 | population: 9 | source: POP10 10 | type: broken 11 | 12 | files: 13 | - name: cities4.csv 14 | add: 15 | year: 2010 16 | -------------------------------------------------------------------------------- /spec/fixtures/import_with_null_value/data.yaml: -------------------------------------------------------------------------------- 1 | index: city-data 2 | api: cities 3 | unique: ['name'] 4 | null_value: 'abc123' 5 | options: 6 | columns: all 7 | 8 | dictionary: 9 | state: USPS 10 | population: POP10 11 | name: NAME 12 | 13 | files: 14 | - name: null_values.csv 15 | -------------------------------------------------------------------------------- /spec/fixtures/import_with_null_value/null_values.csv: -------------------------------------------------------------------------------- 1 | USPS,GEOID,ANSICODE,NAME,POP10 2 | NY,abc123,2395220,New York,8175133 3 | CA,644000,2410877,Los Angeles,3792621 4 | IL,1714000,428803,Chicago,2695598 5 | TX,4835000,2410796,Houston,2099451 6 | -------------------------------------------------------------------------------- /spec/fixtures/import_with_options/cities4.csv: -------------------------------------------------------------------------------- 1 | USPS,GEOID,ANSICODE,NAME,POP10 2 | NY,3651000,2395220,New York,8175133 3 | CA,644000,2410877,Los Angeles,3792621 4 | IL,1714000,428803,Chicago,2695598 5 | TX,4835000,2410796,Houston,2099451 -------------------------------------------------------------------------------- /spec/fixtures/import_with_options/data.yaml: -------------------------------------------------------------------------------- 1 | version: fixture-import-options 2 | index: city-data 3 | api: cities 4 | options: 5 | columns: all 6 | limit_files: 1 7 | limit_rows: 3 8 | 9 | dictionary: 10 | state: USPS 11 | name: NAME 12 | population: POP10 13 | 14 | files: 15 | - name: cities4.csv 16 | add: 17 | year: 2010 18 | - name: more_cities.csv # this shouldn't get imported 19 | add: 20 | year: 1000 21 | -------------------------------------------------------------------------------- /spec/fixtures/import_with_options/more_cities.csv: -------------------------------------------------------------------------------- 1 | USPS,GEOID,ANSICODE,NAME,POP10 2 | XX,0,0,YY,0 3 | -------------------------------------------------------------------------------- /spec/fixtures/invalid_utf8.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/spec/fixtures/invalid_utf8.csv -------------------------------------------------------------------------------- /spec/fixtures/minimal/data.yaml: -------------------------------------------------------------------------------- 1 | # smallest possible data.yaml 2 | index: my-index 3 | -------------------------------------------------------------------------------- /spec/fixtures/nested2/data.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | version: Aug6-2015-08-10-23:48-0600 3 | api: fakeschool 4 | index: fakeschool-data 5 | unique: 6 | - id 7 | options: 8 | # columns: all 9 | limit_files: 1 10 | limit_rows: 100 11 | search: dictionary_only 12 | 13 | dictionary: 14 | id: 15 | source: UNITID 16 | type: integer 17 | description: Unit ID for institution 18 | ope8_id: 19 | source: OPEID 20 | type: integer 21 | description: 8-digit OPE ID for institution 22 | ope6_id: 23 | source: opeid6 24 | type: integer 25 | description: 6-digit OPE ID for institution 26 | school.name: 27 | source: INSTNM 28 | type: literal 29 | description: Institution name 30 | school.city: 31 | source: CITY_MAIN 32 | description: City 33 | school.state: 34 | source: STABBR_MAIN 35 | description: State postcode 36 | school.zip: 37 | source: ZIP_MAIN 38 | type: integer 39 | description: ZIP code 40 | 41 | files: 42 | - name: school2013.csv 43 | -------------------------------------------------------------------------------- /spec/fixtures/nested2/school2013.csv: -------------------------------------------------------------------------------- 1 | UNITID,CITY_MAIN,STABBR_MAIN,ST_FIPS_MAIN,ZIP_MAIN,REGION_MAIN,LATITUDE_MAIN,LONGITUDE_MAIN,INSTNM,SAT_AVG,earn_2002_p10,gt_25k_2006_p6 2 | 1,Normal,AL,1,35762,5,34.7834,-86.5685,Reichert University,1195,26318,0.53 3 | 2,Montgomery,AL,1,36109-3378,5,32.3842,-86.2164,Montgomery School,770,6785,0.61 4 | 3,Montevallo,AL,1,35115-6000,5,33.1063,-86.8651,Indigo Card Community College,526,16767,0.50 5 | 4,Montgomery,AL,1,36104-0271,5,32.3643,-86.2957,Warm Meadow School of Fine Art,457,1836,0.09 6 | 5,Alexander City,AL,1,35010,5,32.9244,-85.9465,Kovacek Institute of Technology,1511,19372,0.82 7 | 6,Athens,AL,1,35611,5,34.8056,-86.9651,Athens Institute,1057,49203,0.06 8 | 7,Auburn University,AL,1,36849,5,32.6002,-85.4924,Alabama Beauty College of Auburn University,486,44097,0.50 9 | 8,Birmingham,AL,1,35254,5,33.5155,-86.8536,Condemned Balloon Institute,616,59759,0.59 10 | 9,Tanner,AL,1,35671,5,34.6543,-86.9491,Inquisitive Farm College,971,34183,0.19 11 | 10,Enterprise,AL,1,36330-1300,5,31.2975,-85.837,Enterprise University,920,42629,0.59 12 | -------------------------------------------------------------------------------- /spec/fixtures/nested_data_type/data.yml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 1.5.5 3 | api: schools 4 | index: school-data 5 | unique: 6 | - id 7 | options: 8 | limit_rows: 10 9 | 10 | dictionary: 11 | id: 12 | source: UNITID 13 | type: integer 14 | description: Unit ID for institution 15 | school.name: 16 | source: INSTNM 17 | type: autocomplete 18 | description: Institution name 19 | school.city: 20 | source: CITY 21 | type: autocomplete 22 | description: City 23 | school.degrees_awarded.predominant: 24 | source: PREDDEG 25 | type: integer 26 | description: |- 27 | Predominant undergraduate degree awarded 28 | 0 Not classified 29 | 1 Predominantly certificate-degree granting 30 | 2 Predominantly associate's-degree granting 31 | 3 Predominantly bachelor's-degree granting 32 | 4 Entirely graduate-degree granting 33 | programs.cip_4_digit.code: 34 | source: CIPCODE 35 | type: integer 36 | map: program 37 | description: Classification of Instructional Programs (CIP) code for the field of study 38 | programs.cip_4_digit.credential.level: 39 | source: CREDLEV 40 | type: integer 41 | map: program 42 | descripton: "Level of credential 43 | Credentials are categorized into the following levels: 44 | 1: Undergraduate Certificate or Diploma 45 | 2: Associate's Degree 46 | 3: Bachelor's Degree 47 | 4: Post-baccalaureate Certificate 48 | 5: Master's Degree 49 | 6: Doctoral Degree 50 | 7: First Professional Degree 51 | 8: Graduate/Professional Certificate" 52 | programs.cip_4_digit.earnings.median_earnings: 53 | source: P_MD_EARN_WNE 54 | map: program 55 | type: integer 56 | description: Median earnings of federally-aided completers in the earnings cohort 57 | 58 | shared_config: &shared_config 59 | contents: 60 | - earnings 61 | - completion 62 | - aid 63 | - repayment 64 | - student 65 | - cost 66 | - academics 67 | - admissions 68 | 69 | data_types: 70 | nested: 71 | - 2016.programs.cip_4_digit 72 | 73 | partial_map: 74 | program_debt: 75 | root: id 76 | path: programs.cip_4_digit 77 | link: programs.cip_4_digit.ope6_id 78 | 79 | files: 80 | - name: Most-Recent-Cohorts-All-Data-Elements.csv 81 | root: true 82 | only: 83 | - school 84 | nest: 85 | key: latest 86 | <<: *shared_config 87 | delta_only: 88 | - ope8_id 89 | - ope6_id 90 | - location 91 | - school 92 | - name: FieldOfStudyData1516_1617_PP.csv 93 | map: program_data 94 | nest: 95 | parent_missing: skip 96 | key: latest 97 | contents: 98 | - programs 99 | - name: MERGED2016_17_PP.csv 100 | nest: 101 | parent_missing: skip 102 | key: 2016 103 | <<: *shared_config -------------------------------------------------------------------------------- /spec/fixtures/nested_delta_files/data.yaml: -------------------------------------------------------------------------------- 1 | version: 1.5.5 2 | api: school 3 | index: fake-nested-delta 4 | unique: [id] 5 | 6 | dictionary: 7 | id: UNITID 8 | name: 9 | source: INSTNM 10 | type: literal 11 | city: CITY_MAIN 12 | state: STABBR_MAIN 13 | zipcode: ZIP_MAIN 14 | location.lat: LATITUDE_MAIN 15 | location.lon: LONGITUDE_MAIN 16 | under_investigation: 17 | source: HCM2 18 | type: integer 19 | description: under ED monitoring flag 20 | 21 | sat_average: SAT_AVG 22 | earnings.6_yrs_after_entry.median: 23 | source: earn_2002_p10 24 | description: Median earnings of students 25 | type: integer 26 | 27 | earnings.6_yrs_after_entry.percent_gt_25k: 28 | source: gt_25k_2006_p6 29 | description: Share of students earning over $25,000/year 30 | type: float 31 | 32 | files: 33 | - name: latest-school-data.csv 34 | only: [id, name, city, state, under_investigation] 35 | - name: latest-school-data.csv 36 | nest: 37 | key: latest 38 | contents: [earnings, sat_average] 39 | delta_only: [city, state, under_investigation] 40 | - name: school2013.csv 41 | nest: 42 | key: 2013 43 | contents: [earnings, sat_average] 44 | - name: school2012.csv 45 | nest: 46 | key: 2012 47 | contents: [earnings, sat_average] 48 | -------------------------------------------------------------------------------- /spec/fixtures/nested_delta_files/delta/latest-school-data_update1.csv: -------------------------------------------------------------------------------- 1 | UNITID,CITY_MAIN,STABBR_MAIN,ST_FIPS_MAIN,ZIP_MAIN,REGION_MAIN,LATITUDE_MAIN,LONGITUDE_MAIN,INSTNM,SAT_AVG,earn_2002_p10,gt_25k_2006_p6,HCM2 2 | 1,Normal,AL,1,35762,5,34.7834,-86.5685,Alabama A & M University,1195,30000,0.53,1 3 | 2,Montgomery,AL,1,36109-3378,5,32.3842,-86.2164,Montgomery School,770,6785,0.61.0 4 | 3,Montevallo,AL,1,35115-6000,5,33.1063,-86.8651,Indigo Card Community College,526,16767,0.50,1 5 | 4,Montgomery,AL,1,36104-0271,5,32.3643,-86.2957,Warm Meadow School of Fine Art,457,1900,0.1,0 6 | 5,Alexander City,AL,1,35010,5,32.9244,-85.9465,Kovacek Institute of Technology,1511,19372,0.82,1 7 | 6,Athens,AL,1,35611,5,34.8056,-86.9651,Athens Institute,1057,49203,0.06,0 8 | 7,Auburn University,AL,1,36849,5,32.6002,-85.4924,Alabama Beauty College of Auburn University,486,44097,0.50,1 9 | 8,Birmingham,AL,1,35254,5,33.5155,-86.8536,Condemned Balloon Institute,616,59759,0.59,0 10 | 9,Tanner,AL,1,35671,5,34.6543,-86.9491,Inquisitive Farm College,971,34183,0.19,1 11 | 10,Enterprise,AL,1,36330-1300,5,31.2975,-85.837,Enterprise University,920,42629,0.59,0 12 | 11,Montgomery,NULL,1,36117,5,32.3643,-86.2957,Auburn University at Montgomery,940,49879,0.64,1 13 | -------------------------------------------------------------------------------- /spec/fixtures/nested_delta_files/latest-school-data.csv: -------------------------------------------------------------------------------- 1 | UNITID,CITY_MAIN,STABBR_MAIN,ST_FIPS_MAIN,ZIP_MAIN,REGION_MAIN,LATITUDE_MAIN,LONGITUDE_MAIN,INSTNM,SAT_AVG,earn_2002_p10,gt_25k_2006_p6,HCM2 2 | 1,Normal,AL,1,35762,5,34.7834,-86.5685,Reichert University,1195,26318,0.53,0 3 | 2,Montgomery,AL,1,36109-3378,5,32.3842,-86.2164,Montgomery School,770,6785,0.61.0 4 | 3,Montevallo,AL,1,35115-6000,5,33.1063,-86.8651,Indigo Card Community College,526,16767,0.50,0 5 | 4,Montgomery,AL,1,36104-0271,5,32.3643,-86.2957,Warm Meadow School of Fine Art,457,1836,0.09,0 6 | 5,Alexander City,AL,1,35010,5,32.9244,-85.9465,Kovacek Institute of Technology,1511,19372,0.82,0 7 | 6,Athens,AL,1,35611,5,34.8056,-86.9651,Athens Institute,1057,49203,0.06,0 8 | 7,Auburn University,AL,1,36849,5,32.6002,-85.4924,Alabama Beauty College of Auburn University,486,44097,0.50,0 9 | 8,Birmingham,AL,1,35254,5,33.5155,-86.8536,Condemned Balloon Institute,616,59759,0.59,0 10 | 9,Tanner,AL,1,35671,5,34.6543,-86.9491,Inquisitive Farm College,971,34183,0.19,0 11 | 10,Enterprise,AL,1,36330-1300,5,31.2975,-85.837,Enterprise University,920,42629,0.59,0 12 | 11,Montgomery,NULL,1,36117,5,32.3643,-86.2957,Auburn University at Montgomery,940,49879,0.64,0 13 | -------------------------------------------------------------------------------- /spec/fixtures/nested_delta_files/school2012.csv: -------------------------------------------------------------------------------- 1 | UNITID,CITY_MAIN,STABBR_MAIN,ST_FIPS_MAIN,ZIP_MAIN,REGION_MAIN,LATITUDE_MAIN,LONGITUDE_MAIN,INSTNM,SAT_AVG,earn_2002_p10,gt_25k_2006_p6,HCM2 2 | 1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,461,35231,0.01,0 3 | 2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,986,34095,0.71,0 4 | 3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1094,42579,0.39,0 5 | 4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,854,37589,0.15,0 6 | 5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,650,13611,0.04,0 7 | 6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,797,36924,0.64,0 8 | 7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,994,31799,0.60,0 9 | 8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1420,30063,0.97,0 10 | 9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1292,42150,0.83,0 11 | 10,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,605,2608,0.92,0 12 | 11,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,2608,0.92,0 13 | -------------------------------------------------------------------------------- /spec/fixtures/nested_delta_files/school2013.csv: -------------------------------------------------------------------------------- 1 | UNITID,CITY_MAIN,STABBR_MAIN,ST_FIPS_MAIN,ZIP_MAIN,REGION_MAIN,LATITUDE_MAIN,LONGITUDE_MAIN,INSTNM,SAT_AVG,earn_2002_p10,gt_25k_2006_p6,HCM2 2 | 1,Normal,AL,1,35762,5,34.7834,-86.5685,Reichert University,1195,26318,0.53,0 3 | 2,Montgomery,AL,1,36109-3378,5,32.3842,-86.2164,Montgomery School,770,6785,0.61,0 4 | 3,Montevallo,AL,1,35115-6000,5,33.1063,-86.8651,Indigo Card Community College,526,16767,0.50,0 5 | 4,Montgomery,AL,1,36104-0271,5,32.3643,-86.2957,Warm Meadow School of Fine Art,457,1836,0.09,0 6 | 5,Alexander City,AL,1,35010,5,32.9244,-85.9465,Kovacek Institute of Technology,1511,19372,0.82,0 7 | 6,Athens,AL,1,35611,5,34.8056,-86.9651,Athens Institute,1057,49203,0.06,0 8 | 7,Auburn University,AL,1,36849,5,32.6002,-85.4924,Alabama Beauty College of Auburn University,486,44097,0.50,0 9 | 8,Birmingham,AL,1,35254,5,33.5155,-86.8536,Condemned Balloon Institute,616,59759,0.59,0 10 | 9,Tanner,AL,1,35671,5,34.6543,-86.9491,Inquisitive Farm College,971,34183,0.19,0 11 | 10,Enterprise,AL,1,36330-1300,5,31.2975,-85.837,Enterprise University,920,42629,0.59,0 12 | -------------------------------------------------------------------------------- /spec/fixtures/nested_files/data.yaml: -------------------------------------------------------------------------------- 1 | version: 0.2 2 | api: school 3 | index: fake-nested 4 | unique: [id] 5 | 6 | dictionary: 7 | id: UNITID 8 | name: 9 | source: INSTNM 10 | type: literal 11 | city: CITY_MAIN 12 | state: STABBR_MAIN 13 | zipcode: ZIP_MAIN 14 | sat_average: SAT_AVG 15 | location.lat: LATITUDE_MAIN 16 | location.lon: LONGITUDE_MAIN 17 | 18 | earnings.6_yrs_after_entry.median: 19 | source: earn_2002_p10 20 | description: Median earnings of students 21 | type: integer 22 | 23 | earnings.6_yrs_after_entry.percent_gt_25k: 24 | source: gt_25k_2006_p6 25 | description: Share of students earning over $25,000/year 26 | type: float 27 | 28 | files: 29 | - name: school-data.csv 30 | only: [id, name, city, state] 31 | - name: school2013.csv 32 | nest: 33 | key: 2013 34 | contents: [earnings, sat_average] 35 | - name: school2012.csv 36 | nest: 37 | key: 2012 38 | contents: [earnings, sat_average] 39 | -------------------------------------------------------------------------------- /spec/fixtures/nested_files/school-data.csv: -------------------------------------------------------------------------------- 1 | UNITID,CITY_MAIN,STABBR_MAIN,ST_FIPS_MAIN,ZIP_MAIN,REGION_MAIN,LATITUDE_MAIN,LONGITUDE_MAIN,INSTNM,SAT_AVG,earn_2002_p10,gt_25k_2006_p6 2 | 1,Normal,AL,1,35762,5,34.7834,-86.5685,Reichert University,1195,26318,0.53 3 | 2,Montgomery,AL,1,36109-3378,5,32.3842,-86.2164,Montgomery School,770,6785,0.61 4 | 3,Montevallo,AL,1,35115-6000,5,33.1063,-86.8651,Indigo Card Community College,526,16767,0.50 5 | 4,Montgomery,AL,1,36104-0271,5,32.3643,-86.2957,Warm Meadow School of Fine Art,457,1836,0.09 6 | 5,Alexander City,AL,1,35010,5,32.9244,-85.9465,Kovacek Institute of Technology,1511,19372,0.82 7 | 6,Athens,AL,1,35611,5,34.8056,-86.9651,Athens Institute,1057,49203,0.06 8 | 7,Auburn University,AL,1,36849,5,32.6002,-85.4924,Alabama Beauty College of Auburn University,486,44097,0.50 9 | 8,Birmingham,AL,1,35254,5,33.5155,-86.8536,Condemned Balloon Institute,616,59759,0.59 10 | 9,Tanner,AL,1,35671,5,34.6543,-86.9491,Inquisitive Farm College,971,34183,0.19 11 | 10,Enterprise,AL,1,36330-1300,5,31.2975,-85.837,Enterprise University,920,42629,0.59 12 | 11,Montgomery,NULL,1,36117,5,32.3643,-86.2957,Auburn University at Montgomery,940,49879,0.64 13 | -------------------------------------------------------------------------------- /spec/fixtures/nested_files/school2011.csv: -------------------------------------------------------------------------------- 1 | UNITID,CITY_MAIN,STABBR_MAIN,ST_FIPS_MAIN,ZIP_MAIN,REGION_MAIN,LATITUDE_MAIN,LONGITUDE_MAIN,INSTNM,SAT_AVG,earn_2002_p10,gt_25k_2006_p6 2 | 1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1025,4048,0.92 3 | 2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,866,45556,0.34 4 | 3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,453,4675,0.71 5 | 4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,572,15466,0.34 6 | 5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1111,11266,0.86 7 | 6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,818,23357,0.58 8 | 7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1392,32584,0.39 9 | 8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,718,252,0.26 10 | 9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1297,36088,0.63 11 | 10,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,635,3259,0.70 12 | -------------------------------------------------------------------------------- /spec/fixtures/nested_files/school2012.csv: -------------------------------------------------------------------------------- 1 | UNITID,CITY_MAIN,STABBR_MAIN,ST_FIPS_MAIN,ZIP_MAIN,REGION_MAIN,LATITUDE_MAIN,LONGITUDE_MAIN,INSTNM,SAT_AVG,earn_2002_p10,gt_25k_2006_p6 2 | 1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,461,35231,0.01 3 | 2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,986,34095,0.71 4 | 3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1094,42579,0.39 5 | 4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,854,37589,0.15 6 | 5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,650,13611,0.04 7 | 6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,797,36924,0.64 8 | 7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,994,31799,0.60 9 | 8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1420,30063,0.97 10 | 9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1292,42150,0.83 11 | 10,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,605,2608,0.92 12 | 11,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,2608,0.92 13 | -------------------------------------------------------------------------------- /spec/fixtures/nested_files/school2013.csv: -------------------------------------------------------------------------------- 1 | UNITID,CITY_MAIN,STABBR_MAIN,ST_FIPS_MAIN,ZIP_MAIN,REGION_MAIN,LATITUDE_MAIN,LONGITUDE_MAIN,INSTNM,SAT_AVG,earn_2002_p10,gt_25k_2006_p6 2 | 1,Normal,AL,1,35762,5,34.7834,-86.5685,Reichert University,1195,26318,0.53 3 | 2,Montgomery,AL,1,36109-3378,5,32.3842,-86.2164,Montgomery School,770,6785,0.61 4 | 3,Montevallo,AL,1,35115-6000,5,33.1063,-86.8651,Indigo Card Community College,526,16767,0.50 5 | 4,Montgomery,AL,1,36104-0271,5,32.3643,-86.2957,Warm Meadow School of Fine Art,457,1836,0.09 6 | 5,Alexander City,AL,1,35010,5,32.9244,-85.9465,Kovacek Institute of Technology,1511,19372,0.82 7 | 6,Athens,AL,1,35611,5,34.8056,-86.9651,Athens Institute,1057,49203,0.06 8 | 7,Auburn University,AL,1,36849,5,32.6002,-85.4924,Alabama Beauty College of Auburn University,486,44097,0.50 9 | 8,Birmingham,AL,1,35254,5,33.5155,-86.8536,Condemned Balloon Institute,616,59759,0.59 10 | 9,Tanner,AL,1,35671,5,34.6543,-86.9491,Inquisitive Farm College,971,34183,0.19 11 | 10,Enterprise,AL,1,36330-1300,5,31.2975,-85.837,Enterprise University,920,42629,0.59 12 | -------------------------------------------------------------------------------- /spec/fixtures/numeric_data/data.yaml: -------------------------------------------------------------------------------- 1 | # cities100.txt 2 | # Test YAML file 3 | index: numeric-data 4 | api: cities 5 | 6 | dictionary: 7 | name: 8 | source: name 9 | type: string 10 | address: 11 | source: address 12 | type: string 13 | city: 14 | source: city 15 | type: string 16 | age: 17 | source: age 18 | type: integer 19 | height: 20 | source: height 21 | type: float 22 | -------------------------------------------------------------------------------- /spec/fixtures/sample-data/data.yaml: -------------------------------------------------------------------------------- 1 | version: cities100-2010 2 | # cities100.txt 3 | # National Places Gazetteer Files, from US Census 2010 4 | # https://www.census.gov/geo/maps-data/data/gazetteer2010.html 5 | # (head -n 1 source.txt && tail -n +2 source.txt | LC_ALL=C sort -k7rn,7 -t$'\t' source.txt) > result.txt 6 | # head -n 101 results.txt > cities100.txt 7 | # then convertes to csv and removed " city" from after each city name 8 | index: city-data 9 | api: cities 10 | unique: ['name'] 11 | options: 12 | search: dictionary_only 13 | 14 | dictionary: 15 | id: 16 | source: GEOID 17 | description: > 18 | Geographic Identifier - fully concatenated geographic code (State FIPS and 19 | County FIPS). The Census Bureau and other state and federal agencies are 20 | responsible for assigning geographic identifiers, or GEOIDs, to geographic 21 | entities to facilitate the organization, presentation, and 22 | exchange of geographic and statistical data. GEOIDs are numeric codes that 23 | uniquely identify all administrative/legal and statistical geographic areas for 24 | which the Census Bureau tabulates data. From Alaska, the largest state, 25 | to the smallest census block in New York City, every geographic area 26 | has a unique GEOID. Data users rely on GEOIDs to join the appropriate 27 | demographic data from censuses and surveys, such as the 28 | American Community Survey (ACS), to various levels of geography for data 29 | analysis, interpretation and mapping. 30 | code: 31 | source: ANSICODE 32 | description: > 33 | American National Standards Institute codes (ANSI codes) 34 | are standardized numeric or alphabetic codes issued by the American 35 | National Standards Institute (ANSI) to ensure uniform identification of 36 | geographic entities through all federal government agencies. 37 | name: 38 | source: NAME 39 | description: The name of the city 40 | type: literal 41 | state: 42 | source: USPS 43 | description: Two letter state abbreviation 44 | population: 45 | source: POP10 46 | description: City population from 2010 Census data 47 | type: integer 48 | location.lat: INTPTLAT 49 | location.lon: INTPTLONG 50 | area.land: 51 | description: Land Area (square miles) 52 | source: ALAND_SQMI 53 | type: float 54 | area.water: 55 | description: Water Area (square miles) 56 | source: AWATER_SQMI 57 | type: float 58 | 59 | categories: 60 | general: 61 | title: General 62 | description: > 63 | general information about the city, including standard 64 | identifiers and actual census summary data about the population of the city. 65 | fields: [id, code, name, state, population] 66 | geographic: 67 | title: Geographic 68 | description: > 69 | Geographic characteristics of the area. These are created for 70 | statistical purposes only. Depiction and designation for statistical 71 | purposes does not constitute a determination of jurisdictional authority 72 | or rights of ownership or entitlement. 73 | fields: [location, area.land, area.water] 74 | 75 | files: 76 | - name: cities100.csv 77 | -------------------------------------------------------------------------------- /spec/fixtures/school_names/data.yaml: -------------------------------------------------------------------------------- 1 | version: 0 2 | index: name-data 3 | api: names 4 | dictionary: 5 | id: ID 6 | school.name: 7 | source: NAME 8 | type: autocomplete 9 | school.state: STATE 10 | 11 | files: 12 | - name: school_names.csv 13 | -------------------------------------------------------------------------------- /spec/fixtures/school_names/school_names.csv: -------------------------------------------------------------------------------- 1 | ID,STATE,NAME 2 | 1,AL,Stillman College 3 | 2,NY,New York University 4 | 3,AZ,Arizona State University 5 | 4,CA,University of California-Berkeley 6 | 5,MA,Berklee College of Music 7 | 6,NY,Berk Trade and Business School 8 | 7,AZ,University of Phoenix-Online Campus 9 | 8,AZ,University of Phoenix-Phoenix Campus 10 | 9,AZ,Phoenix College 11 | -------------------------------------------------------------------------------- /spec/fixtures/schools/data.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | version: Aug6-2015-08-10-23:48-0600 3 | api: fakeschool 4 | index: fakeschool-data 5 | unique: 6 | - id 7 | options: 8 | limit_files: 1 9 | limit_rows: 100 10 | 11 | dictionary: 12 | id: 13 | source: UNITID 14 | type: integer 15 | description: Unit ID for institution 16 | school.name: 17 | source: INSTNM 18 | description: Institution name 19 | school.city: 20 | source: CITY_MAIN 21 | description: City 22 | school.state: 23 | source: STABBR_MAIN 24 | description: State postcode 25 | school.zip: 26 | source: ZIP_MAIN 27 | type: integer 28 | description: ZIP code 29 | completion.rate.lt_four_year: 30 | source: C150_L4_POOLED_SUPP 31 | type: float 32 | description: 150% completion rate for less-than-four-year institutions, pooled in two-year rolling averages and suppressed for small n size 33 | completion.rate.four_year: 34 | source: C150_4_POOLED_SUPP 35 | type: float 36 | description: 150% completion rate for four-year institutions, pooled in two-year rolling averages and suppressed for small n size 37 | completion.rate.overall: 38 | calculate: C150_L4_POOLED_SUPP or C150_4_POOLED_SUPP 39 | type: float 40 | description: 150% completion rate for the institution, independent of degree 41 | 42 | files: 43 | - name: schools.csv 44 | -------------------------------------------------------------------------------- /spec/fixtures/schools/schools.csv: -------------------------------------------------------------------------------- 1 | UNITID,CITY_MAIN,STABBR_MAIN,ST_FIPS_MAIN,ZIP_MAIN,REGION_MAIN,LATITUDE_MAIN,LONGITUDE_MAIN,INSTNM,SAT_AVG,PREDDEG,UGDS,MENONLY,WOMENONLY,C150_4_POOLED_SUPP,C150_L4_POOLED_SUPP,earn_2002_p10,gt_25k_2006_p6 2 | 1,Normal,AL,1,35762,5,34.7834,-86.5685,Indigo Peak School,639,1,183504,0,0,NULL,0.16,3800,0.61 3 | 2,Montgomery,AL,1,36109-3378,5,32.3842,-86.2164,Warm Thread Beauty College,1218,3,210739,0,0,0.62,NULL,13566,0.10 4 | 3,Montevallo,AL,1,35115-6000,5,33.1063,-86.8651,Arrogant Abyss University,613,1,116967,0,0,NULL,0,1177,0.84 5 | 4,Montgomery,AL,1,36104-0271,5,32.3643,-86.2957,Greasy Marsh Institute,590,1,81254,0,1,NULL,NULL,54146,0.49 6 | 5,Alexander City,AL,1,35010,5,32.9244,-85.9465,Harber Institute of Technology,1355,1,256538,1,0,0,0.91,38553,0.32 7 | 6,Athens,AL,1,35611,5,34.8056,-86.9651,Unsightly Mountain School of Fine Art,1201,1,139899,0,0,NULL,0.87,55899,0.95 8 | 7,Auburn University,AL,1,36849,5,32.6002,-85.4924,Auburn University College,740,3,165974,0,0,0.21,NULL,51608,0.73 9 | 8,Birmingham,AL,1,35254,5,33.5155,-86.8536,Birmingham School,1084,1,224554,0,0,NULL,0.70,29545,0.67 10 | 9,Tanner,AL,1,35671,5,34.6543,-86.9491,Conn Institute of Technology,1171,4,87710,0,0,NULL,0.56,58307,0.63 11 | 10,Enterprise,AL,1,36330-1300,5,31.2975,-85.837,Hollow Resonance Institute,1058,2,97265,0,0,NULL,0.59,17880,0.36 12 | -------------------------------------------------------------------------------- /spec/fixtures/types/data.yaml: -------------------------------------------------------------------------------- 1 | 2 | version: 0 3 | # cities100.txt 4 | # National Places Gazetteer Files, from US Census 2010 5 | # https://www.census.gov/geo/maps-data/data/gazetteer2010.html 6 | # (head -n 1 source.txt && tail -n +2 source.txt | LC_ALL=C sort -k7rn,7 -t$'\t' source.txt) > result.txt 7 | # head -n 101 results.txt > cities100.txt 8 | # then convertes to csv and removed " city" from after each city name 9 | dictionary: 10 | id: 11 | source: id 12 | type: literal 13 | city.name: 14 | source: name 15 | type: name 16 | city.state: state 17 | location.lat: lat 18 | location.lon: lon 19 | 20 | index: place-data 21 | api: places 22 | files: 23 | - name: places.csv 24 | -------------------------------------------------------------------------------- /spec/fixtures/types/places.csv: -------------------------------------------------------------------------------- 1 | id,state,name,lat,lon 2 | ca sf,CA,San Francisco,37.727239,-123.032229 3 | ny ny,NY,New York,40.664274,-73.938500 4 | la no,LA,New Orleans,30.068636,-89.939007 5 | -------------------------------------------------------------------------------- /spec/lib/data_magic/calculated_columns_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | require 'data_magic' 3 | 4 | describe "calculated columns" do 5 | 6 | before :example do 7 | DataMagic.destroy 8 | ENV['DATA_PATH'] = data_path 9 | end 10 | after :example do 11 | DataMagic.destroy 12 | end 13 | 14 | describe "combine into float" do 15 | let(:data_path) { "./spec/fixtures/schools" } 16 | it "can combine two columns" do 17 | DataMagic.config = DataMagic::Config.new 18 | DataMagic.import_with_dictionary 19 | result = DataMagic.search({}, fields: ['id', 'completion.rate.overall']) 20 | results = result['results'].sort_by { |hash| hash['id'] } 21 | expect(results[0]).to eq('id' => 1, 'completion.rate.overall' => 0.16) 22 | expect(results[1]).to eq('id' => 2, 'completion.rate.overall' => 0.62) 23 | expect(results[2]).to eq('id' => 3, 'completion.rate.overall' => nil) 24 | expect(results[3]).to eq('id' => 4, 'completion.rate.overall' => nil) 25 | expect(results[4]).to eq('id' => 5, 'completion.rate.overall' => 0.91) 26 | end 27 | end 28 | 29 | describe "combine into boolean" do 30 | let(:data_path) { "./spec/fixtures/calculated_columns" } 31 | it "can combine multiple columns" do 32 | DataMagic.config = DataMagic::Config.new 33 | DataMagic.import_with_dictionary 34 | result = DataMagic.search({}, fields: %w(id summarybool)) 35 | results = result['results'].sort_by { |hash| hash['id'] } 36 | expect(results[0]).to eq('id' => 1, 'summarybool' => true) 37 | expect(results[1]).to eq('id' => 2, 'summarybool' => false) 38 | expect(results[2]).to eq('id' => 3, 'summarybool' => true) 39 | end 40 | end 41 | end 42 | -------------------------------------------------------------------------------- /spec/lib/data_magic/config_field_types_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'DataMagic::Config #field_types' do 4 | let(:config) { DataMagic::Config.new(load_datayaml: false) } 5 | 6 | it "returns empty if dictionary is empty" do 7 | allow(config).to receive(:file_config).and_return([{'name' => 'one.csv'}]) 8 | allow(config).to receive(:dictionary).and_return({}) 9 | expect(config.field_types).to eq({}) 10 | end 11 | 12 | context "when no type is given" do 13 | before do 14 | allow(config).to receive(:file_config).and_return([{'name' => 'one.csv'}]) 15 | allow(config).to receive(:dictionary).and_return({ 16 | 'name' => {source:'NAME_COLUMN'} 17 | }) 18 | end 19 | 20 | it "defaults to string" do 21 | expect(config.field_types).to eq({ 22 | 'name' => 'string' 23 | }) 24 | end 25 | end 26 | 27 | it "supports integers" do 28 | allow(config).to receive(:file_config).and_return([{'name' => 'one.csv'}]) 29 | allow(config).to receive(:dictionary).and_return( 30 | IndifferentHash.new count: 31 | {source:'COUNT_COLUMN', type: 'integer'} 32 | ) 33 | expect(config.field_types).to eq({'count' => 'integer'}) 34 | end 35 | 36 | context "with float type" do 37 | it "sets float mapping" do 38 | allow(config).to receive(:file_config).and_return([{'name' => 'one.csv'}]) 39 | allow(config).to receive(:dictionary).and_return( 40 | IndifferentHash.new percent: 41 | {source:'PERCENT_COLUMN', type: 'float'} 42 | ) 43 | expect(config.field_types).to eq({'percent' => 'float'}) 44 | end 45 | 46 | it "can be excluded" do 47 | allow(config).to receive(:dictionary).and_return( 48 | IndifferentHash.new id: {source:'ID', type: 'integer'}, 49 | percent: {source:'PERCENT', type: 'float'} 50 | ) 51 | allow(config).to receive(:file_config).and_return([ 52 | IndifferentHash.new({ name:'one.csv', only: ['id'] }) 53 | ]) 54 | expect(config.field_types).to eq({'id' => 'integer'}) 55 | end 56 | 57 | it "can be nested" do 58 | allow(config).to receive(:dictionary).and_return( 59 | IndifferentHash.new id: {source:'ID', type: 'integer'}, 60 | percent: {source:'PERCENT', type: 'float'} 61 | ) 62 | allow(config).to receive(:file_config).and_return([ 63 | IndifferentHash.new({name:'one.csv', 64 | only: ['id']}), 65 | IndifferentHash.new({name:'two.csv', 66 | nest: {key: '2012', contents: ['percent']}}) 67 | ]) 68 | expect(config.field_types).to eq({ 69 | 'id' => 'integer', 70 | '2012.percent' => 'float' 71 | }) 72 | end 73 | end 74 | 75 | it "supports location.lat and location.lon fields" do 76 | allow(config).to receive(:file_config).and_return([{'name' => 'one.csv'}]) 77 | allow(config).to receive(:dictionary).and_return( 78 | IndifferentHash.new 'location.lat': {source:'LAT_COLUMN', type: 'float'}, 79 | 'location.lon': {source:'LON_COLUMN', type: 'float'} 80 | ) 81 | expect(config.field_types).to eq( 82 | { 83 | 'location.lat'=>'float', 84 | 'location.lon'=>'float' 85 | } 86 | ) 87 | end 88 | end 89 | -------------------------------------------------------------------------------- /spec/lib/data_magic/config_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe DataMagic::Config do 4 | before(:all) do 5 | ENV['DATA_PATH'] = './spec/fixtures/import_with_dictionary' 6 | end 7 | 8 | it "detects data.yml files" do 9 | ENV['DATA_PATH'] = './spec/fixtures/cities_with_yml' 10 | config = DataMagic::Config.new 11 | expect(config.data["api"]).to eq("cities") 12 | end 13 | 14 | describe 'slugification' do 15 | it 'slugifies local paths' do 16 | config = DataMagic::Config.new 17 | slugified = config.clean_index('path/to/my_directory') 18 | expect(slugified).to eq('my-directory') 19 | end 20 | 21 | it 'slugifes s3 bucket names' do 22 | config = DataMagic::Config.new 23 | slugified = config.clean_index('s3://user:pass@my_bucket') 24 | expect(slugified).to eq('my-bucket') 25 | end 26 | end 27 | 28 | context "s3" do 29 | it "detects data.yaml" do 30 | ENV['DATA_PATH'] = 's3://mybucket' 31 | fake_s3 = class_spy("Fake Aws::S3::Client") 32 | fake_get_object_response = double( 33 | "S3 response", 34 | body: StringIO.new({ 'index' => 'fake-index' }.to_yaml), 35 | isOK: true, 36 | status: 200 37 | ) 38 | allow(fake_s3).to receive(:get_object) 39 | .with(bucket: 'mybucket', key: 'data.yaml', response_target: duck_type(:read)) 40 | .and_return(fake_get_object_response) 41 | config = DataMagic::Config.new(s3: fake_s3) 42 | expect(config.s3).to eq(fake_s3) 43 | expect(config.data["index"]).to eq("fake-index") 44 | end 45 | 46 | it "raises error if s3 errors" do 47 | ENV['DATA_PATH'] = 's3://mybucket' 48 | fake_s3 = class_spy("Fake Aws::S3::Client") 49 | 50 | allow(fake_s3).to receive(:get_object) 51 | .with(bucket: 'mybucket', key: 'data.yaml', response_target: duck_type(:read)) 52 | .and_raise(RuntimeError) 53 | expect { 54 | DataMagic::Config.new(s3: fake_s3) 55 | }.to raise_error(RuntimeError) 56 | end 57 | 58 | end 59 | 60 | context "create" do 61 | it "works with zero args" do 62 | expect(DataMagic::Config.new).to_not be_nil 63 | end 64 | it "can set s3 client" do 65 | # TODO: mock s3 66 | s3_client = "s3 client" 67 | config = DataMagic::Config.new(s3: s3_client) 68 | expect(config.s3).to eq(s3_client) 69 | end 70 | end 71 | 72 | context "when loaded" do 73 | let(:config) { DataMagic::Config.new } 74 | 75 | after do 76 | config.clear_all 77 | end 78 | 79 | context "#scoped_index_name" do 80 | it "includes environment prefix" do 81 | expect(config.scoped_index_name).to eq('test-city-data') 82 | end 83 | end 84 | 85 | it "has config data" do 86 | default_config = { 87 | "version" => "cities100-2010", 88 | "index" => "city-data", "api" => "cities", 89 | "files" => [{ "name" => "cities100.csv" }], 90 | "options" => {:search=>"dictionary_only"}, 91 | "unique" => ["name"], 92 | "data_path" => "./sample-data" 93 | } 94 | expect(config.data.keys).to include('dictionary') 95 | dictionary = config.data.delete 'dictionary' 96 | 97 | expect(dictionary.keys.sort).to eq %w(id code name state population 98 | location.lat location.lon land_area area.water).sort 99 | categories = config.data.delete 'categories' 100 | expect(categories.keys.sort).to eq %w(general general2 general3 general4 general5 geographic).sort 101 | expect(config.data).to eq(default_config) 102 | end 103 | 104 | it "has default page size" do 105 | expect(DataMagic::DEFAULT_PAGE_SIZE).to_not be_nil 106 | expect(config.page_size).to eq(DataMagic::DEFAULT_PAGE_SIZE) 107 | end 108 | 109 | describe "#update_indexed_config" do # rename ... or do this in load_config or something 110 | context "after loading config" do 111 | let(:fixture_path) { "./spec/fixtures/import_with_dictionary" } 112 | before do 113 | config.load_datayaml(fixture_path) 114 | end 115 | it "should be true" do 116 | expect(config.update_indexed_config).to be true 117 | end 118 | it "should set new data_path" do 119 | expect(config.data_path).to eq(fixture_path) 120 | end 121 | 122 | it "twice should be false" do 123 | config.update_indexed_config 124 | expect(config.update_indexed_config).to be false 125 | end 126 | end 127 | end 128 | 129 | describe "when has a custom null_value" do 130 | it 'should have a default null value' do 131 | expect(config.null_value).to eq('NULL') 132 | end 133 | 134 | it 'should set null value field' do 135 | config.load_datayaml("./spec/fixtures/import_with_null_value") 136 | expect(config.null_value).to eq('abc123') 137 | end 138 | end 139 | end 140 | 141 | context ".calculated_field_list" do 142 | let(:config) { DataMagic::Config.new(load_datayaml: false) } 143 | it "finds fields with 'calculate' property" do 144 | allow(config).to receive(:dictionary).and_return( 145 | { 146 | one: { 147 | source: 'column1', 148 | type: 'float' 149 | }, 150 | two: { 151 | source: 'column2', 152 | type: 'float' 153 | }, 154 | all: { 155 | calculate: 'column1 or column2', 156 | type: 'float', 157 | description: 'something' 158 | } 159 | } 160 | ) 161 | expect(config.calculated_field_list).to eq(['all']) 162 | end 163 | end 164 | 165 | context ".only_field_list" do 166 | let(:config) { DataMagic::Config.new(load_datayaml: false) } 167 | let(:simple_fields) do 168 | { 'one' => 'column1', 'two' => 'column2', 'three' => 'column3' } 169 | end 170 | let(:fields_with_dots) do 171 | { 'one' => 'column1', 'two.a' => 'column2a', 'two.b' => 'column2b' } 172 | end 173 | 174 | it "selects a subset" do 175 | expect(config.only_field_list(%w(one two), simple_fields)).to eq( 176 | 'one' => 'column1', 'two' => 'column2' 177 | ) 178 | end 179 | 180 | it "selects fields with dots" do 181 | expect(config.only_field_list(%w(two), fields_with_dots)).to eq( 182 | 'two.a' => 'column2a', 'two.b' => 'column2b' 183 | ) 184 | end 185 | end 186 | end 187 | -------------------------------------------------------------------------------- /spec/lib/data_magic/create_index_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | require 'data_magic' 3 | 4 | describe "DataMagic #init" do 5 | before (:all) do 6 | ENV['DATA_PATH'] = './spec/fixtures/import_with_dictionary' 7 | end 8 | 9 | after(:each) do 10 | DataMagic.destroy 11 | end 12 | 13 | context "with no options" do 14 | it "creates index only once" do 15 | expect(DataMagic).to receive(:create_index).once 16 | DataMagic.init 17 | end 18 | 19 | it "creates index" do 20 | DataMagic.init 21 | expect(DataMagic.config.index_exists?).to be true 22 | end 23 | 24 | it "does not re-create index with subsequent call to #import_with_dictionary" do 25 | expect(DataMagic).to receive(:create_index).once 26 | DataMagic.init 27 | DataMagic.import_with_dictionary 28 | end 29 | end 30 | 31 | 32 | context "with load_now: false" do 33 | it "does not call #create_index" do 34 | expect(DataMagic).not_to receive(:create_index) 35 | DataMagic.init(load_now: false) 36 | end 37 | 38 | it "does not create index" do 39 | DataMagic.init(load_now: false) 40 | expect(DataMagic.config.index_exists?).to be false 41 | end 42 | 43 | it "creates index with subsequent call to #import_with_dictionary" do 44 | DataMagic.init(load_now: false) 45 | DataMagic.import_with_dictionary 46 | expect(DataMagic.config.index_exists?).to be true 47 | end 48 | 49 | it "creates index with subsequent call to #import_csv" do 50 | ENV['DATA_PATH'] = './spec/fixtures/minimal' 51 | DataMagic.init(load_now: false) 52 | data_str = <<-eos 53 | a,b 54 | 1,2 55 | 3,4 56 | eos 57 | data = StringIO.new(data_str) 58 | DataMagic.import_csv(data) 59 | expect(DataMagic.config.index_exists?).to be true 60 | end 61 | end 62 | end -------------------------------------------------------------------------------- /spec/lib/data_magic/example_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe Example do 4 | let(:hash) do 5 | { name: 'foo', 6 | description: 'interesting thing', 7 | params: 'a=1&b=something', 8 | endpoint: 'api' } 9 | end 10 | subject(:e) { Example.new(hash) } 11 | 12 | it "has a name" do 13 | expect(e.name).to eq(hash[:name]) 14 | end 15 | it "has a description" do 16 | expect(e.description).to eq(hash[:description]) 17 | end 18 | it "has a params" do 19 | expect(e.params).to eq(hash[:params]) 20 | end 21 | it "has an endpoint" do 22 | expect(e.endpoint).to eq(hash[:endpoint]) 23 | end 24 | 25 | it "has a link" do 26 | expect(e.link).to eq("/v1/#{e.endpoint}?#{e.params}") 27 | end 28 | end 29 | -------------------------------------------------------------------------------- /spec/lib/data_magic/import_csv_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | require 'data_magic' 3 | 4 | describe "DataMagic #import_csv" do 5 | before do 6 | ENV['DATA_PATH'] = './spec/fixtures/minimal' 7 | DataMagic.init(load_now: false) 8 | end 9 | after do 10 | DataMagic.destroy 11 | #expect(DataMagic.client.indices.get(index: '_all')).to be_empty 12 | end 13 | 14 | it "throws errors for bad format" do 15 | data = StringIO.new("not csv format") 16 | expect{DataMagic.import_csv(data)}.to raise_error(DataMagic::InvalidData) 17 | end 18 | 19 | it "reads file and reports number of rows and headers" do 20 | data_str = <<-eos 21 | a,b 22 | 1,2 23 | 3,4 24 | eos 25 | data = StringIO.new(data_str) 26 | num_rows, fields = DataMagic.import_csv(data) 27 | expect(num_rows).to be(2) 28 | expect(fields).to eq(['a', 'b']) 29 | end 30 | 31 | end 32 | -------------------------------------------------------------------------------- /spec/lib/data_magic/import_with_delta_file_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | require 'data_magic' 3 | 4 | describe "delta update" do 5 | 6 | before :example do 7 | DataMagic.destroy 8 | ENV['DATA_PATH'] = './spec/fixtures/nested_delta_files' 9 | DataMagic.config = DataMagic::Config.new 10 | DataMagic.import_with_dictionary 11 | DataMagic.import_with_delta({delta_original: 'latest-school-data.csv', delta_update: 'latest-school-data_update1.csv'}) 12 | end 13 | after :example do 14 | DataMagic.destroy 15 | end 16 | let(:query) { {} } 17 | let(:sort) { nil } 18 | let(:result) { DataMagic.search(query, sort: sort) } 19 | let(:first) { result['results'].first } 20 | let(:id_one) { result['results'].find { |item| item['id'] == '1' } } 21 | let(:total) { result['metadata']['total'] } 22 | 23 | it "updates one document per unique id" do 24 | expect(total).to eq(11) 25 | end 26 | 27 | it "updates root document :delta_only fields" do 28 | expect(id_one['id']).to eq('1') 29 | expect(id_one['under_investigation']).to eq(1) 30 | end 31 | 32 | it "does not update root document fields not specified in :delta_only" do 33 | expect(id_one['name']).to eq('Reichert University') 34 | end 35 | 36 | it "updates nested documents per unique id" do 37 | expect(id_one['latest']).to_not be_nil 38 | expect(id_one['latest']['earnings']['6_yrs_after_entry']['median']).to eq(30000) 39 | end 40 | 41 | it "does not update nested documents in non-delta files" do 42 | expect(id_one['id']).to eq('1') 43 | expect(id_one['2013']).to_not be_nil 44 | expect(id_one['2013']['earnings']['6_yrs_after_entry']['median']).to eq(26318) 45 | end 46 | 47 | context "can import a subset of fields" do 48 | context "and when searching for a field value" do 49 | let(:query) { {zipcode: "35762"} } 50 | it "and doesn't find column" do 51 | expect(total).to eq(0) 52 | end 53 | end 54 | it "and doesn't include extra field" do 55 | expect(first['zipcode']).to be(nil) 56 | end 57 | end 58 | 59 | context "when searching on a nested field" do 60 | let(:query) { { 'latest.earnings.6_yrs_after_entry.median' => 30000 } } 61 | it "can find the correct results" do 62 | expect(total).to eq(1) 63 | expect(first['latest']['earnings']['6_yrs_after_entry']).to eq({"percent_gt_25k"=>0.53, "median"=>30000}) 64 | end 65 | end 66 | 67 | context "when sorting by a nested field" do 68 | let(:sort) { 'latest.earnings.6_yrs_after_entry.median' } 69 | it "can find the right first result" do 70 | expect(total).to eq(11) 71 | expect(first['latest']['earnings']['6_yrs_after_entry']).to eq({"percent_gt_25k"=>0.1, "median"=>1900}) 72 | end 73 | end 74 | end 75 | 76 | 77 | -------------------------------------------------------------------------------- /spec/lib/data_magic/import_with_nested_files_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | require 'data_magic' 3 | 4 | describe "unique key(s)" do 5 | 6 | before :example do 7 | DataMagic.destroy 8 | ENV['DATA_PATH'] = './spec/fixtures/nested_files' 9 | DataMagic.config = DataMagic::Config.new 10 | DataMagic.import_with_dictionary 11 | end 12 | after :example do 13 | DataMagic.destroy 14 | end 15 | let(:query) { {} } 16 | let(:sort) { nil } 17 | let(:result) { DataMagic.search(query, sort: sort) } 18 | let(:first) { result['results'].first } 19 | let(:id_one) { result['results'].find { |item| item['id'] == '1' } } 20 | let(:total) { result['metadata']['total'] } 21 | 22 | it "creates one document per unique id" do 23 | expect(total).to eq(11) 24 | end 25 | 26 | it "nests documents per unique id" do 27 | expect(id_one['id']).to eq('1') 28 | expect(id_one['2013']).to_not be_nil 29 | end 30 | 31 | it "root document contains special 'only' fields" do 32 | expect(id_one['id']).to eq('1') 33 | expect(id_one['name']).to eq('Reichert University') 34 | expect(id_one['city']).to eq('Normal') 35 | expect(id_one['state']).to eq('AL') 36 | end 37 | 38 | context "can import a subset of fields" do 39 | context "and when searching for a field value" do 40 | let(:query) { {zipcode: "35762"} } 41 | it "and doesn't find column" do 42 | expect(total).to eq(0) 43 | end 44 | end 45 | it "and doesn't include extra field" do 46 | expect(first['zipcode']).to be(nil) 47 | end 48 | end 49 | 50 | context "when searching on a nested field" do 51 | let(:query) { { '2013.earnings.6_yrs_after_entry.median' => 26318 } } 52 | it "can find the correct results" do 53 | expect(total).to eq(1) 54 | expect(first['2013']['earnings']['6_yrs_after_entry']).to eq({"percent_gt_25k"=>0.53, "median"=>26318}) 55 | end 56 | end 57 | 58 | context "when sorting by a nested field" do 59 | let(:sort) { '2013.earnings.6_yrs_after_entry.median' } 60 | it "can find the right first result" do 61 | expect(total).to eq(11) 62 | expect(first['2013']['earnings']['6_yrs_after_entry']).to eq({"percent_gt_25k"=>0.09, "median"=>1836}) 63 | end 64 | end 65 | end 66 | -------------------------------------------------------------------------------- /spec/lib/data_magic/import_without_data_yaml_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | require 'data_magic' 3 | 4 | describe "DataMagic #import_without_data_yaml" do 5 | describe "without ALLOW_MISSING_YML" do 6 | it "not found locally raises error" do 7 | ENV['DATA_PATH'] = './spec/fixtures/cities_without_yml' 8 | expect { 9 | DataMagic.init(load_now: true) 10 | }.to raise_error(IOError, "No data.y?ml found at ./spec/fixtures/cities_without_yml. Did you mean to define ALLOW_MISSING_YML environment variable?") 11 | end 12 | it "not found on s3 raises error" do 13 | ENV['DATA_PATH'] = 's3://mybucket' 14 | fake_s3 = Aws::S3::Client.new(stub_responses: true) 15 | fake_s3.stub_responses(:get_object, Aws::S3::Errors::NoSuchKey.new(Seahorse::Client::RequestContext, 'Fake Error')) 16 | expect { 17 | config = DataMagic::Config.new(s3: fake_s3) 18 | }.to raise_error(IOError, "No data.y?ml found at s3://mybucket. Did you mean to define ALLOW_MISSING_YML environment variable?") 19 | end 20 | 21 | end 22 | describe "with ALLOW_MISSING_YML" do 23 | let (:expected) do 24 | { 25 | "metadata" => { 26 | "total" => 1, 27 | "page" => 0, 28 | "per_page" => DataMagic::DEFAULT_PAGE_SIZE 29 | }, 30 | "results" => [] 31 | } 32 | end 33 | 34 | before(:all) do 35 | DataMagic.destroy 36 | ENV['ALLOW_MISSING_YML'] = 'allow' 37 | ENV['DATA_PATH'] = './spec/fixtures/cities_without_yml' 38 | DataMagic.init(load_now: true) 39 | end 40 | after(:all) do 41 | DataMagic.destroy 42 | ENV['ALLOW_MISSING_YML'] = '' 43 | end 44 | 45 | it "can get list of imported csv files" do 46 | file_list = [ 47 | "./spec/fixtures/cities_without_yml/cities50.csv", 48 | "./spec/fixtures/cities_without_yml/cities51-100.csv", 49 | "./spec/fixtures/cities_without_yml/more.csv", 50 | ] 51 | expect(DataMagic.config.files.sort).to eq(file_list) 52 | end 53 | 54 | it "can get index name from api endpoint" do 55 | expect(DataMagic.config.find_index_for('cities-without-yml')).to eq('cities-without-yml') 56 | end 57 | 58 | it "indexes files with yaml mapping" do 59 | result = DataMagic.search({NAME: "Chicago"}, api: 'cities-without-yml') 60 | expected["results"] = [ 61 | { 62 | "USPS"=>"IL", 63 | "GEOID"=>"1714000", 64 | "ANSICODE"=>"00428803", 65 | "NAME"=>"Chicago", 66 | "LSAD"=>"25", 67 | "FUNCSTAT"=>"A", 68 | "POP10"=>"2695598", 69 | "HU10"=>"1194337", 70 | "ALAND"=>"589571105", 71 | "AWATER"=>"16781658", 72 | "ALAND_SQMI"=>"227.635", 73 | "AWATER_SQMI"=>"6.479", 74 | "INTPTLAT"=>"41.837551", 75 | "INTPTLONG"=>"-87.681844", 76 | } 77 | ] 78 | expect(result).to eq(expected) 79 | end 80 | end 81 | end 82 | -------------------------------------------------------------------------------- /spec/lib/data_magic/index/document_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | require 'data_magic' 3 | 4 | describe DataMagic::Index::Document do 5 | before do 6 | allow(DataMagic).to receive(:config).and_return(config) 7 | end 8 | 9 | let(:document) { DataMagic::Index::Document.new(data) } 10 | let(:config) { DataMagic::Config.new() } 11 | let(:data) { {} } 12 | 13 | context 'when configured without any unique keys' do 14 | before do 15 | config.data['unique'] = [] 16 | end 17 | 18 | it 'id should be nil' do 19 | expect(document.id).to be(nil) 20 | end 21 | 22 | it 'id should not be empty though' do 23 | expect(document.id_empty?).to be_falsey 24 | end 25 | end 26 | 27 | context 'when configured with the default keys' do 28 | context 'and there is no data' do 29 | it 'id should be an empty string' do 30 | expect(document.id).to eq('') 31 | end 32 | 33 | it 'id should be considered empty' do 34 | expect(document.id_empty?).to be_truthy 35 | end 36 | end 37 | 38 | context 'when there is data' do 39 | let(:data) { 40 | {"name" => "foo", "state"=>"MA"} 41 | } 42 | 43 | it 'id should be the value for the name key' do 44 | expect(document.id).to eq('foo') 45 | end 46 | 47 | it 'id should not be considered empty' do 48 | expect(document.id_empty?).to be_falsey 49 | end 50 | end 51 | end 52 | 53 | context 'with custom id configuration' do 54 | let(:data) { 55 | {"name" => "foo", "state"=>"MA"} 56 | } 57 | 58 | before do 59 | config.data['unique'] = ['name', 'state'] 60 | end 61 | 62 | it 'id should build the right id for the data' do 63 | expect(document.id).to eq('foo:MA') 64 | end 65 | 66 | it 'id should not be considered empty' do 67 | expect(document.id_empty?).to be_falsey 68 | end 69 | end 70 | end 71 | -------------------------------------------------------------------------------- /spec/lib/data_magic/index/event_logger_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | require 'data_magic' 3 | 4 | describe DataMagic::Index::EventLogger do 5 | let(:event_logger) { 6 | l = DataMagic::Index::EventLogger.new 7 | allow(l).to receive(:logger).and_return(logger) 8 | l 9 | } 10 | 11 | let(:logger) { double('logger') } 12 | 13 | context 'when triggering an event with only a message argument' do 14 | it 'logs the message with the right level' do 15 | expect(logger).to receive(:info).with('hey!') 16 | event_logger.trigger('info', 'hey!') 17 | 18 | expect(logger).to receive(:debug).with('what happened?') 19 | event_logger.trigger('debug', 'what happened?') 20 | 21 | expect(logger).to receive(:warn).with('dude? everything ok?') 22 | event_logger.trigger('warn', 'dude? everything ok?') 23 | 24 | expect(logger).to receive(:error).with('FIRE IN THE HOLE!') 25 | event_logger.trigger('error', 'FIRE IN THE HOLE!') 26 | end 27 | end 28 | 29 | context 'when triggering an event with a message and an object' do 30 | it 'logs as a key value pair with an inspection of the object' do 31 | expect(logger).to receive(:info).with("foo: {:wild=>\"bar\"}") 32 | event_logger.trigger('info', 'foo', {wild: 'bar'}) 33 | end 34 | 35 | it 'will shorten the object inspection when provided a limit' do 36 | expect(logger).to receive(:warn).with("foo: {:wild") 37 | event_logger.trigger('warn', 'foo', {wild: 'bar'}, 5) 38 | end 39 | end 40 | end 41 | -------------------------------------------------------------------------------- /spec/lib/data_magic/index/importer_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | require 'data_magic' 3 | 4 | describe "DataMagic::Index::Importer" do 5 | before do 6 | ENV['DATA_PATH'] = './spec/fixtures/minimal' 7 | DataMagic.init(load_now: false) 8 | end 9 | after do 10 | DataMagic.destroy 11 | end 12 | 13 | it "indexes in parallel based on NPROCS" do 14 | stub_const('ENV', { 'NPROCS' => '2' }) 15 | 16 | data_str = <<-eos 17 | a,b 18 | 1,2 19 | 3,4 20 | eos 21 | data = StringIO.new(data_str) 22 | num_rows, fields = DataMagic.import_csv(data) 23 | expect(num_rows).to be(2) 24 | expect(fields).to eq(['a', 'b']) 25 | end 26 | end 27 | -------------------------------------------------------------------------------- /spec/lib/data_magic/index/repository_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | require 'data_magic' 3 | 4 | describe DataMagic::Index::Repository do 5 | let(:repository) { DataMagic::Index::Repository.new(super_client, document) } 6 | 7 | let(:super_client) { double('super client', {index_name: 'index', nested_partial?: false}) } 8 | let(:document) { double('document', {id: 'id', data: 'data'}) } 9 | 10 | context 'when super client is creating' do 11 | before do 12 | allow(super_client).to receive(:creating?).and_return(true) 13 | allow(super_client).to receive(:index) 14 | end 15 | 16 | it '#save creates an index' do 17 | expect(super_client).to receive(:index).with({ 18 | index: 'index', 19 | id: 'id', 20 | type: 'document', 21 | body: 'data', 22 | timeout: '5m' 23 | }) 24 | repository.save 25 | end 26 | 27 | it '#save will not be skipped when successful' do 28 | repository.save 29 | expect(repository.skipped?).to be_falsey 30 | end 31 | end 32 | 33 | context 'when super client is not creating' do 34 | before do 35 | allow(super_client).to receive(:creating?).and_return(false) 36 | allow(super_client).to receive(:allow_skips?) 37 | allow(super_client).to receive(:update) 38 | end 39 | 40 | it '#save updates an index' do 41 | expect(super_client).to receive(:update).with({ 42 | index: 'index', 43 | id: 'id', 44 | type: 'document', 45 | body: {doc: 'data'}, 46 | timeout: '5m' 47 | }) 48 | repository.save 49 | end 50 | 51 | it '#save will not be skipped when successful' do 52 | repository.save 53 | expect(repository.skipped?).to be_falsey 54 | end 55 | end 56 | 57 | context 'when super client is not creating, not skipping and an error is raised' do 58 | before do 59 | allow(super_client).to receive(:creating?).and_return(false) 60 | allow(super_client).to receive(:allow_skips?).and_return(false) 61 | end 62 | 63 | it '#save raises an error' do 64 | allow(super_client).to receive(:update).and_raise(Elasticsearch::Transport::Transport::Errors::NotFound) 65 | expect { 66 | repository.save 67 | }.to raise_error(Elasticsearch::Transport::Transport::Errors::NotFound) 68 | end 69 | end 70 | 71 | context 'when super client is not creating, skipping and an error is raised' do 72 | before do 73 | allow(super_client).to receive(:creating?).and_return(false) 74 | allow(super_client).to receive(:allow_skips?).and_return(true) 75 | end 76 | 77 | it '#save marks the repository as skipped' do 78 | allow(super_client).to receive(:update).and_raise(Elasticsearch::Transport::Transport::Errors::NotFound) 79 | expect { 80 | repository.save 81 | }.not_to raise_error 82 | expect(repository.skipped?).to eq(true) 83 | end 84 | end 85 | end 86 | -------------------------------------------------------------------------------- /spec/lib/data_magic/name_type_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | require 'data_magic' 3 | 4 | describe "DataMagic name types" do 5 | 6 | before :example do 7 | DataMagic.destroy 8 | ENV['DATA_PATH'] = './spec/fixtures/types' 9 | DataMagic.init(load_now: true) 10 | end 11 | after :example do 12 | DataMagic.destroy 13 | end 14 | 15 | it "can search for one word" do 16 | response = DataMagic.search({'city.name' => 'New'}, fields:['city.name']) 17 | results = response['results'].sort {|a,b| a['city.name'] <=> b['city.name']} 18 | expect(results).to eq( 19 | [{"city.name"=>"New Orleans"}, {"city.name"=>"New York"}]) 20 | end 21 | 22 | it "can search for multiple words" do 23 | response = DataMagic.search({'city.name' => 'New York'}, fields:['city.name']) 24 | results = response['results'] 25 | expect(results).to eq( 26 | [{"city.name"=>"New York"}]) 27 | end 28 | 29 | it "can search for partial words" do 30 | response = DataMagic.search({'city.name' => 'S Fran'}, fields:['city.name']) 31 | results = response['results'] 32 | expect(results).to eq( 33 | [{"city.name"=>"San Francisco"}]) 34 | end 35 | 36 | it "is not case sensitive" do 37 | response = DataMagic.search({'city.name' => 'nEW'}, fields:['city.name']) 38 | results = response['results'].sort {|a,b| a['city.name'] <=> b['city.name']} 39 | expect(results).to eq( 40 | [{"city.name"=>"New Orleans"}, {"city.name"=>"New York"}]) 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /spec/lib/data_magic/nested_and_non_nested_combos_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | require 'data_magic' 3 | require 'hashie' 4 | 5 | describe DataMagic::QueryBuilder do 6 | 7 | before :example do 8 | DataMagic.destroy 9 | DataMagic.client 10 | ENV['DATA_PATH'] = './spec/fixtures/nested_data_type' 11 | DataMagic.config = DataMagic::Config.new 12 | end 13 | 14 | after :example do 15 | DataMagic.destroy 16 | end 17 | 18 | RSpec.configure do |c| 19 | c.alias_it_should_behave_like_to :it_correctly, 'correctly:' 20 | end 21 | 22 | let(:nested_meta) { { post_es_response: {}, from: 0, size: 20, _source: false } } 23 | let(:options) { {} } 24 | let(:query_hash) { DataMagic::QueryBuilder.from_params(subject, options, DataMagic.config) } 25 | let(:non_nested_range) {{ 26 | or: [{ 27 | range: { 28 | "school.degrees_awarded.predominant": { 29 | "gte": 1, 30 | "lte": 3 31 | } 32 | } 33 | }] 34 | }} 35 | let(:non_nested_autocomplete) {{ 36 | common: { 37 | "school.name" => { 38 | query: "arizona", 39 | cutoff_frequency: 0.001, 40 | low_freq_operator: "and" 41 | } 42 | } 43 | }} 44 | let(:nested_match) {{ 45 | nested: { 46 | inner_hits: { 47 | size: DataMagic::MAX_NESTED_RESULT 48 | }, 49 | path: "2016.programs.cip_4_digit", 50 | query: { 51 | bool: { 52 | filter: [{ 53 | bool: { 54 | must: [{ 55 | match: { "2016.programs.cip_4_digit.code" => "1312" } 56 | }] 57 | } 58 | }] 59 | } 60 | } 61 | } 62 | }} 63 | 64 | shared_examples "builds a query" do 65 | it "with a query section" do 66 | expect(query_hash[:query]).to eql expected_query 67 | end 68 | it "with query metadata" do 69 | expect(query_hash.reject { |k, _| k == :query }).to eql nested_meta 70 | end 71 | end 72 | 73 | describe "appropriately combines queries for nested and non-nested datatypes" do 74 | context "both queries are match queries" do 75 | subject {{ 76 | "2016.programs.cip_4_digit.code" => "1312", 77 | "id" => "243744" 78 | }} 79 | 80 | let(:expected_query) {{ 81 | bool: { 82 | must: { match: { "id" => "243744" }}, 83 | filter: nested_match 84 | } 85 | }} 86 | 87 | it_correctly "builds a query" 88 | end 89 | 90 | context "non-nested query is an autocomplete query and nested query is a match query" do 91 | subject {{ 92 | "2016.programs.cip_4_digit.code" => "1312", 93 | "school.name" => "arizona" 94 | }} 95 | 96 | let(:expected_query) {{ 97 | bool: { 98 | must: non_nested_autocomplete, 99 | filter: nested_match 100 | } 101 | }} 102 | 103 | it_correctly "builds a query" 104 | 105 | 106 | end 107 | 108 | context "non-nested query is an range query and nested query is a match query" do 109 | subject {{ 110 | "2016.programs.cip_4_digit.code" => "1312", 111 | "school.degrees_awarded.predominant__range" => "1..3" 112 | }} 113 | 114 | let(:expected_query) {{ 115 | bool: { 116 | filter: [ 117 | non_nested_range, 118 | nested_match 119 | ] 120 | } 121 | }} 122 | 123 | it_correctly "builds a query" 124 | end 125 | 126 | context "query includes non-nested range query and autocomplete query and nested query is a match query" do 127 | subject {{ 128 | "2016.programs.cip_4_digit.code" => "1312", 129 | "school.degrees_awarded.predominant__range" => "1..3", 130 | "school.name" => "arizona" 131 | }} 132 | 133 | let(:expected_query) {{ 134 | bool: { 135 | filter: [ 136 | non_nested_range, 137 | nested_match, 138 | { 139 | bool: { 140 | must: non_nested_autocomplete 141 | } 142 | } 143 | ] 144 | } 145 | }} 146 | 147 | it_correctly "builds a query" 148 | end 149 | end 150 | end -------------------------------------------------------------------------------- /spec/lib/data_magic/search_name_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | require 'data_magic' 3 | require 'csv' 4 | 5 | describe "DataMagic intuitive search" do 6 | 7 | before :example do 8 | DataMagic.destroy 9 | ENV['DATA_PATH'] = './spec/fixtures/school_names' 10 | DataMagic.init(load_now: true) 11 | end 12 | after :example do 13 | DataMagic.destroy 14 | end 15 | 16 | RSpec.configure do |c| 17 | c.alias_it_should_behave_like_to :it_correctly, 'correctly:' 18 | end 19 | 20 | let(:expected_meta) {{"metadata"=>{"total"=>1, "page"=>0, "per_page"=>20}}} 21 | let(:expected_match) { "" } 22 | let(:response) { DataMagic.search( 23 | {'school.name' => subject}, fields:['school.name']) } 24 | 25 | context "full request" do 26 | let(:response) { DataMagic.search({id: 1}) } 27 | let(:expected_match) { [{"id"=>"1", "school"=>{"state"=>"AL", "name"=>"Stillman College"}}]} 28 | it "provides expected document" do 29 | expect(response['results']).to eql expected_match 30 | end 31 | end 32 | 33 | context "sort" do 34 | shared_examples "returns" do 35 | it "sorted results " do 36 | expect(response['results'].map { |i| i['school.name'] }) 37 | .to eql expected_match 38 | end 39 | end 40 | 41 | context "with list of names" do 42 | let(:response) { DataMagic.search({}, fields:['school.name'], 43 | sort: 'school.name') } 44 | # fields:['name'], 45 | let(:expected_match) { 46 | csv_path = File.expand_path("../../fixtures/school_names/school_names.csv", __dir__) 47 | data = CSV.read(csv_path).slice(1..-1) 48 | data.map { |row| row[2] } 49 | .sort.slice(0,20) 50 | } 51 | it_correctly "returns" 52 | end 53 | 54 | end 55 | 56 | context "basic search" do 57 | shared_examples "finds" do 58 | it "correct results " do 59 | expect(response['results'] 60 | .map { |i| i['school.name'] } 61 | .sort ) 62 | .to eql expected_match 63 | end 64 | it "correct metadata" do 65 | expect(response.reject { |k, _| k == 'results' }).to eql expected_meta 66 | end 67 | end 68 | 69 | context "for exact match" do 70 | subject { 'New York University' } 71 | let(:expected_match) { ['New York University'] } 72 | it_correctly "finds" 73 | end 74 | context "for exact match (case insensitive)" do 75 | subject { 'new YORK UniverSity' } 76 | let(:expected_match) { ['New York University'] } 77 | it_correctly "finds" 78 | end 79 | 80 | context "for exact match (case insensitive)" do 81 | subject { 'new YORK UniverSity' } 82 | let(:expected_match) { ['New York University'] } 83 | it_correctly "finds" 84 | end 85 | 86 | context "by prefix" do 87 | subject { 'Still' } 88 | let(:expected_match) { ['Stillman College'] } 89 | it_correctly "finds" 90 | end 91 | 92 | context "by prefix (case insensitive)" do 93 | subject { 'still' } 94 | let(:expected_match) { ['Stillman College'] } 95 | it_correctly "finds" 96 | end 97 | 98 | context "by prefix in the middle of the name" do 99 | subject { 'Phoenix' } 100 | let(:expected_meta) {{"metadata"=>{"total"=>3, "page"=>0, "per_page"=>20}}} 101 | let(:expected_match) { ['Phoenix College', 102 | 'University of Phoenix-Online Campus', 103 | "University of Phoenix-Phoenix Campus"] } 104 | it_correctly "finds" 105 | end 106 | 107 | context "with words in the wrong order" do 108 | subject { 'University New York' } 109 | let(:expected_match) { ['New York University'] } 110 | it_correctly "finds" 111 | end 112 | 113 | context "partial word after dash" do 114 | subject { 'berk' } 115 | let(:expected_meta) {{"metadata"=>{"total"=>3, "page"=>0, "per_page"=>20}}} 116 | let(:expected_match) { ['Berk Trade and Business School', 117 | 'Berklee College of Music', 118 | 'University of California-Berkeley'] } 119 | it_correctly "finds" 120 | end 121 | 122 | context "words separated by dash" do 123 | subject { 'phoenix online' } 124 | let(:expected_match) { ['University of Phoenix-Online Campus'] } 125 | it_correctly "finds" 126 | end 127 | end 128 | # TO DO 129 | # "pheonix" (mis-spelling) should probably work 130 | # "phoenix college" should also probably return "university of phoenix" --- since college is a synonym for unversity 131 | 132 | end 133 | -------------------------------------------------------------------------------- /spec/lib/data_magic_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | require 'data_magic' 3 | require 'fixtures/data.rb' 4 | 5 | describe DataMagic do 6 | it "cleans up after itself" do 7 | DataMagic.init(load_now: true) 8 | DataMagic.destroy 9 | DataMagic.logger.info "just destroyed" 10 | #expect(DataMagic.client.indices.get(index: '_all')).to be_empty 11 | end 12 | 13 | describe '.es_field_types' do 14 | it 'returns the given fields with their specified type' do 15 | expect(described_class.es_field_types({ 'state' => 'string', land_area: 'string' })) 16 | .to eq("state" => { :type => "string" }, 17 | :land_area => { :type => "string" }) 18 | end 19 | 20 | context 'with custom type "literal"' do 21 | it 'returns string type with :index of "not_analyzed"' do 22 | expect(described_class.es_field_types({ 'state' => 'string', 'name' => 'literal' })) 23 | .to eq({"state"=>{:type=>"string"}, "name"=>{:type=>"string", :index=>"not_analyzed"}}) 24 | end 25 | end 26 | 27 | end 28 | 29 | end 30 | -------------------------------------------------------------------------------- /spec/lib/expression/eval_spec.rb: -------------------------------------------------------------------------------- 1 | require 'expression/parser' 2 | require 'expression/eval' 3 | 4 | describe Expression::Eval do 5 | 6 | let(:parser) { Expression::Parser.new } 7 | let(:eval) { Expression::Eval.new } 8 | let(:values) {{ 'f' => 0, 't' => 1 }} 9 | 10 | it "simple 'or'" do 11 | expect( 12 | eval.apply(parser.parse('t or f'), variables: values) 13 | ).to eq(1) 14 | end 15 | 16 | describe "simple 'and'" do 17 | it "true and false" do 18 | expect( 19 | eval.apply(parser.parse('t and f'), variables: values) 20 | ).to eq(0) 21 | end 22 | 23 | it "false and true" do 24 | expect( 25 | eval.apply(parser.parse('f and t'), variables: values) 26 | ).to eq(0) 27 | end 28 | end 29 | 30 | it "multiple operands" do 31 | expect( 32 | eval.apply(parser.parse('f or f or t'), variables: values) 33 | ).to eq(1) 34 | end 35 | 36 | describe "parens" do 37 | it "nested 'or'" do 38 | expect( 39 | eval.apply(parser.parse('(f or t) and t'), variables: values) 40 | ).to eq(1) 41 | end 42 | 43 | it "nested 'and'" do 44 | expect( 45 | eval.apply(parser.parse('(f and t) or f'), variables: values) 46 | ).to eq(0) 47 | end 48 | end 49 | end 50 | -------------------------------------------------------------------------------- /spec/lib/expression/parser_spec.rb: -------------------------------------------------------------------------------- 1 | require 'expression/parser' 2 | 3 | describe Expression::Parser do 4 | 5 | let(:parser) { Expression::Parser.new } 6 | describe 'vars' do 7 | it "parses one" do 8 | expect(parser.parse('one')).to eq(var: 'one') 9 | end 10 | it "preserves case " do 11 | expect(parser.parse('ONe')).to eq(var: 'ONe') 12 | end 13 | it "consumes trailing white space" do 14 | expect(parser.parse('one ')).to eq(var: 'one') 15 | end 16 | end 17 | 18 | it "parses or expression" do 19 | expect(parser.parse('apples or oranges')).to eq( 20 | {or: {left: {var: "apples"}, right: {var: "oranges"}}} 21 | ) 22 | end 23 | 24 | it "parses and expression" do 25 | expect(parser.parse('apples and oranges')).to eq( 26 | {and: {left: {var: "apples"}, right: {var: "oranges"}}} 27 | ) 28 | end 29 | 30 | describe "parens" do 31 | it "nested 'or'" do 32 | expect(parser.parse('(apples or cranberries) and nuts')).to eq( 33 | {:and => { 34 | :left=>{:or=>{:left=>{:var=>"apples"}, :right=>{:var=>"cranberries"}}}, 35 | :right=>{:var=>"nuts"}}} 36 | ) 37 | end 38 | it "nested 'and'" do 39 | expect(parser.parse('(nuts and cranberries) or apples')).to eq( 40 | { or: { 41 | left: { and: { left: {var: "nuts"}, right: {var:"cranberries"}}}, 42 | right: { var: "apples" } 43 | } 44 | } 45 | ) 46 | end 47 | 48 | end 49 | 50 | end 51 | -------------------------------------------------------------------------------- /spec/lib/expression/variables_spec.rb: -------------------------------------------------------------------------------- 1 | require 'expression/parser' 2 | require 'expression/variables' 3 | 4 | describe Expression::Variables do 5 | 6 | let(:parser) { Expression::Parser.new } 7 | let(:variables) { Expression::Variables.new } 8 | it "gets one variable name" do 9 | expect(variables.apply(parser.parse('one'))).to eq(['one']) 10 | end 11 | it "preserves case " do 12 | expect(variables.apply(parser.parse('ONe'))).to eq(['ONe']) 13 | end 14 | it "multiple variables" do 15 | expect(variables.apply(parser.parse('fox or cow or goat'))).to eq(%w[fox cow goat]) 16 | end 17 | 18 | end 19 | -------------------------------------------------------------------------------- /spec/lib/expression_spec.rb: -------------------------------------------------------------------------------- 1 | require 'expression/expression' 2 | 3 | describe Expression do 4 | context "simple or expression" do 5 | it "can find variables" do 6 | expr = "ONE or TWO" 7 | expect(Expression.new(expr).variables).to eq(%w(ONE TWO)) 8 | end 9 | 10 | it "evaluates: 0 OR 1 to be 1" do 11 | expr = "f or t" 12 | values = {f:0, t:1} 13 | expect(Expression.new(expr).evaluate(values)).to eq(1) 14 | end 15 | 16 | it "evaluates: 1 OR 0 to be 1" do 17 | expr = "t or f" 18 | values = {f:0, t:1} 19 | expect(Expression.new(expr).evaluate(values)).to eq(1) 20 | end 21 | 22 | it "evaluates: 0 OR 0 to be 0" do 23 | expr = "f1 or f2" 24 | values = {f1:0, f2:0} 25 | expect(Expression.new(expr).evaluate(values)).to eq(0) 26 | end 27 | 28 | it "evaluates: 1 OR 1 to be 1" do 29 | expr = "t1 or t2" 30 | values = {t1:1, t2:1} 31 | expect(Expression.new(expr).evaluate(values)).to eq(1) 32 | end 33 | 34 | it "evaluates: 1 OR nil to be 1" do 35 | expr = "t1 or t2" 36 | values = {t1:1, t2:nil} 37 | expect(Expression.new(expr).evaluate(values)).to eq(1) 38 | end 39 | 40 | it "evaluates: 0 OR nil to be nil" do 41 | expr = "t1 or t2" 42 | values = {t1:0, t2:nil} 43 | expect(Expression.new(expr).evaluate(values)).to eq(nil) 44 | end 45 | 46 | it "evaluates: nil OR 0 to be 0" do 47 | expr = "t1 or t2" 48 | values = {t1:nil, t2:0} 49 | expect(Expression.new(expr).evaluate(values)).to eq(0) 50 | end 51 | 52 | it "evaluates: nil OR nil to be nil" do 53 | expr = "t1 or t2" 54 | values = {t1:nil, t2:nil} 55 | expect(Expression.new(expr).evaluate(values)).to eq(nil) 56 | end 57 | end 58 | end 59 | -------------------------------------------------------------------------------- /spec/lib/nested_hash_spec.rb: -------------------------------------------------------------------------------- 1 | require 'nested_hash' 2 | 3 | describe NestedHash do 4 | let(:input) { {"loc.x" => 1, "loc.y" => 2, "foo.a" => 10, "foo.b" => 20, "foo.c.baz" => 3,}} 5 | let(:expected) {{"loc" => {"x" => 1, "y" => 2}, "foo" => {"a" => 10, "b" => 20, "c" => { "baz" => 3}}}} 6 | let(:expected_updated) {{"loc" => {"x" => 1, "y" => 2}, "foo" => {"a" => 10, "b" => 20, "c" => { "baz" => "buzz"}}}} 7 | 8 | let(:symbol_keys) { {x:1, y:2}} 9 | let(:symbol_keys_result) { {'x' => 1, 'y' => 2}} 10 | 11 | 12 | it ".add created nested hash elements for string keys with '.'" do 13 | result = NestedHash.new.add(input) 14 | expect(result).to eq(expected) 15 | end 16 | 17 | it "does no harm when initialized with an already nested hash" do 18 | expect(NestedHash.new(expected)).to eq(expected) 19 | end 20 | 21 | context "methods" do 22 | let (:result) { NestedHash.new(input) } 23 | it "can initialize with another Hash" do 24 | expect(result).to eq(expected) 25 | end 26 | 27 | it "can generate dotkeys" do 28 | expect(result.dotkeys.sort).to eq(input.keys.sort) 29 | end 30 | 31 | it "withdotkeys generates keys with '.'" do 32 | expect(result.withdotkeys).to eq(input) 33 | end 34 | 35 | it "dotkeys and withdotkeys have same order" do 36 | expect(result.withdotkeys.keys).to eq(result.dotkeys) 37 | end 38 | 39 | it "can set a nested key's value by a dotted-string" do 40 | result.dotkey_set('foo.c.baz', 'buzz') 41 | expect(result).to eq(expected_updated) 42 | end 43 | end 44 | 45 | 46 | it "turns symbol keys into simple strings" do 47 | result = NestedHash.new.add(symbol_keys) 48 | expect(result).to eq(symbol_keys_result) 49 | end 50 | 51 | context "deeply nested" do 52 | let(:input) { {"info.loc.x" => 0.11, "info.loc.y" => 0.222, "foo.a" => 10, "foo.b" => 20}} 53 | let(:expected) { {"info" => {"loc" => {"x" => 0.11, "y" => 0.222}}, "foo" => {"a" => 10, "b" => 20}}} 54 | 55 | it "creates nested hash elements for string keys with '.'" do 56 | result = NestedHash.new.add(input) 57 | expect(result).to eq(expected) 58 | end 59 | 60 | end 61 | 62 | end 63 | -------------------------------------------------------------------------------- /spec/lib/zipcode_spec.rb: -------------------------------------------------------------------------------- 1 | require 'zipcode/zipcode' 2 | 3 | describe Zipcode do 4 | it "gives a location based on zipcode" do 5 | location = Zipcode.latlon('94132') 6 | expect(location).to eq(lat: 37.7211, lon: -122.4754) 7 | end 8 | it "supports zipcode given as a number" do 9 | location = Zipcode.latlon(94132) 10 | expect(location).to eq(lat: 37.7211, lon: -122.4754) 11 | end 12 | 13 | describe '#valid' do 14 | it "returns true if the zipcode is valid" do 15 | expect(Zipcode.valid? 94132).to eq(true) 16 | end 17 | it "returns false if the zipcode is invalid" do 18 | expect(Zipcode.valid? 00002).to eq(false) 19 | end 20 | end 21 | end 22 | -------------------------------------------------------------------------------- /spec/spec.rake: -------------------------------------------------------------------------------- 1 | begin 2 | require 'rspec/core/rake_task' 3 | 4 | spec_tasks = Dir['spec/*/'].each_with_object([]) do |d, result| 5 | result << File.basename(d) unless Dir["#{d}*"].empty? 6 | end 7 | 8 | spec_tasks.each do |folder| 9 | desc "Run the spec suite in #{folder}" 10 | RSpec::Core::RakeTask.new("spec:#{folder}") do |t| 11 | t.pattern = "./spec/#{folder}/**/*_spec.rb" 12 | t.rspec_opts = "--color" 13 | end 14 | end 15 | 16 | desc "Run complete application spec suite" 17 | RSpec::Core::RakeTask.new(:spec) 18 | rescue LoadError 19 | puts "RSpec is not part of this bundle, skip specs." 20 | end 21 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | require 'simplecov' 2 | SimpleCov.start 3 | 4 | ENV['DATA_PATH'] = nil 5 | ENV['RACK_ENV'] ||= 'test' 6 | RACK_ENV = ENV['RACK_ENV'] unless defined?(RACK_ENV) 7 | 8 | #require File.expand_path(File.dirname(__FILE__) + "/../config/boot") 9 | require_relative '../config/env.rb' 10 | Dir[File.expand_path(File.dirname(__FILE__) + "/../app/helpers/**/*.rb")].each(&method(:require)) 11 | 12 | RSpec.configure do |config| 13 | config.include Rack::Test::Methods 14 | 15 | config.before(:type => :feature) do 16 | # load the Padrino web app defined in app/app.rb 17 | require_relative '../config/boot' 18 | end 19 | config.before do 20 | ENV['DATA_PATH'] = nil 21 | end 22 | end 23 | 24 | # You can use this method to custom specify a Rack app 25 | # you want rack-test to invoke: 26 | # 27 | # app OpenDataMaker::App 28 | # app OpenDataMaker::App.tap { |a| } 29 | # app(OpenDataMaker::App) do 30 | # set :foo, :bar 31 | # end 32 | # 33 | def app(app = nil, &blk) 34 | @app ||= block_given? ? app.instance_eval(&blk) : app 35 | @app ||= Padrino.application 36 | end 37 | -------------------------------------------------------------------------------- /spec/tasks/import_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | require 'bundler/setup' 3 | require 'padrino-core/cli/rake' 4 | 5 | describe 'elastic search index management rake task' do 6 | before do 7 | PadrinoTasks.init 8 | DataMagic.init(load_now: true) 9 | end 10 | 11 | after do 12 | DataMagic.destroy 13 | end 14 | 15 | context "imports" do 16 | it "default sample-data" do 17 | ENV['DATA_PATH'] = nil 18 | expect { Rake::Task['import'].invoke }.not_to raise_exception 19 | end 20 | 21 | it "correct configuration" do 22 | dir_path = './spec/fixtures/import_with_dictionary' 23 | ENV['DATA_PATH'] = dir_path 24 | expect { Rake::Task['import'].invoke }.not_to raise_exception 25 | expect(DataMagic.config.api_endpoint_names).to eq(['cities']) 26 | end 27 | 28 | end 29 | 30 | end 31 | -------------------------------------------------------------------------------- /tasks/delta.rake: -------------------------------------------------------------------------------- 1 | require_relative '../lib/data_magic.rb' 2 | 3 | desc 'delta update current ES index with delta data file. ARG 1 must be title as specified in data.yaml, e.g., Most-Recent-Cohorts-All-Data-Elements.csv. ARG 2 is the filename of the new delta file nested in a "/delta/" subdirectory. USAGE delta[ORIGINAL_FILENAME.csv,DELTA_FILENAME.csv]' 4 | task :delta, [:original, :update] => :environment do |t, args| 5 | options = {} 6 | options[:delta_original] = args[:original] || 'Most-Recent-Cohorts-All-Data-Elements.csv' 7 | options[:delta_update] = args[:update] 8 | DataMagic.import_with_delta(options) 9 | end 10 | -------------------------------------------------------------------------------- /tasks/es.rake: -------------------------------------------------------------------------------- 1 | require_relative '../lib/data_magic.rb' 2 | 3 | namespace :es do 4 | desc "delete elasticsearch index (_all for all)" 5 | task :delete, [:index_name] => :environment do |t, args| 6 | DataMagic.client.indices.delete(index: args[:index_name]) 7 | end 8 | 9 | desc "list elasticsearch indices" 10 | task :list => :environment do |t, args| 11 | result = DataMagic.client.indices.get(index: '_all').keys 12 | puts result.join("\n") 13 | end 14 | end 15 | -------------------------------------------------------------------------------- /tasks/import.rake: -------------------------------------------------------------------------------- 1 | require_relative '../lib/data_magic.rb' 2 | require 'ruby-prof' 3 | 4 | desc "import files from DATA_PATH via rake import, optionally rake import[filename.csv] to restart import from a specific file, also rake import[nil,profile=true] for profile output." 5 | task :import, [:continue, :profile] => :environment do |t, args| 6 | options = {} 7 | options[:continue] = args[:continue] 8 | start_time = Time.now 9 | RubyProf.start if args[:profile] 10 | 11 | DataMagic.import_with_dictionary(options) 12 | 13 | if args[:profile] 14 | result = RubyProf.stop 15 | end_time = Time.now 16 | puts "indexing complete: #{distance_of_time_in_words(end_time, start_time)}" 17 | puts "duration: #{end_time - start_time}" 18 | 19 | printer = RubyProf::MultiPrinter.new(result); 20 | printer.print(path: ".", profile: "profile", min_percent: 2) 21 | end 22 | end 23 | -------------------------------------------------------------------------------- /test_coverage_snapshots/10_3_19/.last_run.json: -------------------------------------------------------------------------------- 1 | { 2 | "result": { 3 | "covered_percent": 40.87 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /test_coverage_snapshots/10_3_19/.resultset.json.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/.resultset.json.lock -------------------------------------------------------------------------------- /test_coverage_snapshots/10_3_19/assets/0.10.2/colorbox/border.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/colorbox/border.png -------------------------------------------------------------------------------- /test_coverage_snapshots/10_3_19/assets/0.10.2/colorbox/controls.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/colorbox/controls.png -------------------------------------------------------------------------------- /test_coverage_snapshots/10_3_19/assets/0.10.2/colorbox/loading.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/colorbox/loading.gif -------------------------------------------------------------------------------- /test_coverage_snapshots/10_3_19/assets/0.10.2/colorbox/loading_background.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/colorbox/loading_background.png -------------------------------------------------------------------------------- /test_coverage_snapshots/10_3_19/assets/0.10.2/favicon_green.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/favicon_green.png -------------------------------------------------------------------------------- /test_coverage_snapshots/10_3_19/assets/0.10.2/favicon_red.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/favicon_red.png -------------------------------------------------------------------------------- /test_coverage_snapshots/10_3_19/assets/0.10.2/favicon_yellow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/favicon_yellow.png -------------------------------------------------------------------------------- /test_coverage_snapshots/10_3_19/assets/0.10.2/loading.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/loading.gif -------------------------------------------------------------------------------- /test_coverage_snapshots/10_3_19/assets/0.10.2/magnify.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/magnify.png -------------------------------------------------------------------------------- /test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_flat_0_aaaaaa_40x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_flat_0_aaaaaa_40x100.png -------------------------------------------------------------------------------- /test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_flat_75_ffffff_40x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_flat_75_ffffff_40x100.png -------------------------------------------------------------------------------- /test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_glass_55_fbf9ee_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_glass_55_fbf9ee_1x400.png -------------------------------------------------------------------------------- /test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_glass_65_ffffff_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_glass_65_ffffff_1x400.png -------------------------------------------------------------------------------- /test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_glass_75_dadada_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_glass_75_dadada_1x400.png -------------------------------------------------------------------------------- /test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_glass_75_e6e6e6_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_glass_75_e6e6e6_1x400.png -------------------------------------------------------------------------------- /test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_glass_95_fef1ec_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_glass_95_fef1ec_1x400.png -------------------------------------------------------------------------------- /test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_highlight-soft_75_cccccc_1x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-bg_highlight-soft_75_cccccc_1x100.png -------------------------------------------------------------------------------- /test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-icons_222222_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-icons_222222_256x240.png -------------------------------------------------------------------------------- /test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-icons_2e83ff_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-icons_2e83ff_256x240.png -------------------------------------------------------------------------------- /test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-icons_454545_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-icons_454545_256x240.png -------------------------------------------------------------------------------- /test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-icons_888888_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-icons_888888_256x240.png -------------------------------------------------------------------------------- /test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-icons_cd0a0a_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/10_3_19/assets/0.10.2/smoothness/images/ui-icons_cd0a0a_256x240.png -------------------------------------------------------------------------------- /test_coverage_snapshots/3_17_20/.last_run.json: -------------------------------------------------------------------------------- 1 | { 2 | "result": { 3 | "covered_percent": 45.3 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /test_coverage_snapshots/3_17_20/.resultset.json.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/.resultset.json.lock -------------------------------------------------------------------------------- /test_coverage_snapshots/3_17_20/assets/0.10.2/colorbox/border.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/colorbox/border.png -------------------------------------------------------------------------------- /test_coverage_snapshots/3_17_20/assets/0.10.2/colorbox/controls.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/colorbox/controls.png -------------------------------------------------------------------------------- /test_coverage_snapshots/3_17_20/assets/0.10.2/colorbox/loading.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/colorbox/loading.gif -------------------------------------------------------------------------------- /test_coverage_snapshots/3_17_20/assets/0.10.2/colorbox/loading_background.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/colorbox/loading_background.png -------------------------------------------------------------------------------- /test_coverage_snapshots/3_17_20/assets/0.10.2/favicon_green.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/favicon_green.png -------------------------------------------------------------------------------- /test_coverage_snapshots/3_17_20/assets/0.10.2/favicon_red.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/favicon_red.png -------------------------------------------------------------------------------- /test_coverage_snapshots/3_17_20/assets/0.10.2/favicon_yellow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/favicon_yellow.png -------------------------------------------------------------------------------- /test_coverage_snapshots/3_17_20/assets/0.10.2/loading.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/loading.gif -------------------------------------------------------------------------------- /test_coverage_snapshots/3_17_20/assets/0.10.2/magnify.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/magnify.png -------------------------------------------------------------------------------- /test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_flat_0_aaaaaa_40x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_flat_0_aaaaaa_40x100.png -------------------------------------------------------------------------------- /test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_flat_75_ffffff_40x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_flat_75_ffffff_40x100.png -------------------------------------------------------------------------------- /test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_glass_55_fbf9ee_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_glass_55_fbf9ee_1x400.png -------------------------------------------------------------------------------- /test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_glass_65_ffffff_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_glass_65_ffffff_1x400.png -------------------------------------------------------------------------------- /test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_glass_75_dadada_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_glass_75_dadada_1x400.png -------------------------------------------------------------------------------- /test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_glass_75_e6e6e6_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_glass_75_e6e6e6_1x400.png -------------------------------------------------------------------------------- /test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_glass_95_fef1ec_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_glass_95_fef1ec_1x400.png -------------------------------------------------------------------------------- /test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_highlight-soft_75_cccccc_1x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-bg_highlight-soft_75_cccccc_1x100.png -------------------------------------------------------------------------------- /test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-icons_222222_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-icons_222222_256x240.png -------------------------------------------------------------------------------- /test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-icons_2e83ff_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-icons_2e83ff_256x240.png -------------------------------------------------------------------------------- /test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-icons_454545_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-icons_454545_256x240.png -------------------------------------------------------------------------------- /test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-icons_888888_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-icons_888888_256x240.png -------------------------------------------------------------------------------- /test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-icons_cd0a0a_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RTICWDT/open-data-maker/1a16df4d9ac316bec598c0106fff568dad3568ee/test_coverage_snapshots/3_17_20/assets/0.10.2/smoothness/images/ui-icons_cd0a0a_256x240.png -------------------------------------------------------------------------------- /wait-for-it.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Use this script to test if a given TCP host/port are available 3 | # https://github.com/vishnubob/wait-for-it 4 | 5 | WAITFORIT_cmdname=${0##*/} 6 | 7 | echoerr() { if [[ $WAITFORIT_QUIET -ne 1 ]]; then echo "$@" 1>&2; fi } 8 | 9 | usage() 10 | { 11 | cat << USAGE >&2 12 | Usage: 13 | $WAITFORIT_cmdname host:port [-s] [-t timeout] [-- command args] 14 | -h HOST | --host=HOST Host or IP under test 15 | -p PORT | --port=PORT TCP port under test 16 | Alternatively, you specify the host and port as host:port 17 | -s | --strict Only execute subcommand if the test succeeds 18 | -q | --quiet Don't output any status messages 19 | -t TIMEOUT | --timeout=TIMEOUT 20 | Timeout in seconds, zero for no timeout 21 | -- COMMAND ARGS Execute command with args after the test finishes 22 | USAGE 23 | exit 1 24 | } 25 | 26 | wait_for() 27 | { 28 | if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then 29 | echoerr "$WAITFORIT_cmdname: waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT" 30 | else 31 | echoerr "$WAITFORIT_cmdname: waiting for $WAITFORIT_HOST:$WAITFORIT_PORT without a timeout" 32 | fi 33 | WAITFORIT_start_ts=$(date +%s) 34 | while : 35 | do 36 | if [[ $WAITFORIT_ISBUSY -eq 1 ]]; then 37 | nc -z $WAITFORIT_HOST $WAITFORIT_PORT 38 | WAITFORIT_result=$? 39 | else 40 | (echo -n > /dev/tcp/$WAITFORIT_HOST/$WAITFORIT_PORT) >/dev/null 2>&1 41 | WAITFORIT_result=$? 42 | fi 43 | if [[ $WAITFORIT_result -eq 0 ]]; then 44 | WAITFORIT_end_ts=$(date +%s) 45 | echoerr "$WAITFORIT_cmdname: $WAITFORIT_HOST:$WAITFORIT_PORT is available after $((WAITFORIT_end_ts - WAITFORIT_start_ts)) seconds" 46 | break 47 | fi 48 | sleep 1 49 | done 50 | return $WAITFORIT_result 51 | } 52 | 53 | wait_for_wrapper() 54 | { 55 | # In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692 56 | if [[ $WAITFORIT_QUIET -eq 1 ]]; then 57 | timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --quiet --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT & 58 | else 59 | timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT & 60 | fi 61 | WAITFORIT_PID=$! 62 | trap "kill -INT -$WAITFORIT_PID" INT 63 | wait $WAITFORIT_PID 64 | WAITFORIT_RESULT=$? 65 | if [[ $WAITFORIT_RESULT -ne 0 ]]; then 66 | echoerr "$WAITFORIT_cmdname: timeout occurred after waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT" 67 | fi 68 | return $WAITFORIT_RESULT 69 | } 70 | 71 | # process arguments 72 | while [[ $# -gt 0 ]] 73 | do 74 | case "$1" in 75 | *:* ) 76 | WAITFORIT_hostport=(${1//:/ }) 77 | WAITFORIT_HOST=${WAITFORIT_hostport[0]} 78 | WAITFORIT_PORT=${WAITFORIT_hostport[1]} 79 | shift 1 80 | ;; 81 | --child) 82 | WAITFORIT_CHILD=1 83 | shift 1 84 | ;; 85 | -q | --quiet) 86 | WAITFORIT_QUIET=1 87 | shift 1 88 | ;; 89 | -s | --strict) 90 | WAITFORIT_STRICT=1 91 | shift 1 92 | ;; 93 | -h) 94 | WAITFORIT_HOST="$2" 95 | if [[ $WAITFORIT_HOST == "" ]]; then break; fi 96 | shift 2 97 | ;; 98 | --host=*) 99 | WAITFORIT_HOST="${1#*=}" 100 | shift 1 101 | ;; 102 | -p) 103 | WAITFORIT_PORT="$2" 104 | if [[ $WAITFORIT_PORT == "" ]]; then break; fi 105 | shift 2 106 | ;; 107 | --port=*) 108 | WAITFORIT_PORT="${1#*=}" 109 | shift 1 110 | ;; 111 | -t) 112 | WAITFORIT_TIMEOUT="$2" 113 | if [[ $WAITFORIT_TIMEOUT == "" ]]; then break; fi 114 | shift 2 115 | ;; 116 | --timeout=*) 117 | WAITFORIT_TIMEOUT="${1#*=}" 118 | shift 1 119 | ;; 120 | --) 121 | shift 122 | WAITFORIT_CLI=("$@") 123 | break 124 | ;; 125 | --help) 126 | usage 127 | ;; 128 | *) 129 | echoerr "Unknown argument: $1" 130 | usage 131 | ;; 132 | esac 133 | done 134 | 135 | if [[ "$WAITFORIT_HOST" == "" || "$WAITFORIT_PORT" == "" ]]; then 136 | echoerr "Error: you need to provide a host and port to test." 137 | usage 138 | fi 139 | 140 | WAITFORIT_TIMEOUT=${WAITFORIT_TIMEOUT:-15} 141 | WAITFORIT_STRICT=${WAITFORIT_STRICT:-0} 142 | WAITFORIT_CHILD=${WAITFORIT_CHILD:-0} 143 | WAITFORIT_QUIET=${WAITFORIT_QUIET:-0} 144 | 145 | # Check to see if timeout is from busybox? 146 | WAITFORIT_TIMEOUT_PATH=$(type -p timeout) 147 | WAITFORIT_TIMEOUT_PATH=$(realpath $WAITFORIT_TIMEOUT_PATH 2>/dev/null || readlink -f $WAITFORIT_TIMEOUT_PATH) 148 | 149 | WAITFORIT_BUSYTIMEFLAG="" 150 | if [[ $WAITFORIT_TIMEOUT_PATH =~ "busybox" ]]; then 151 | WAITFORIT_ISBUSY=1 152 | # Check if busybox timeout uses -t flag 153 | # (recent Alpine versions don't support -t anymore) 154 | if timeout &>/dev/stdout | grep -q -e '-t '; then 155 | WAITFORIT_BUSYTIMEFLAG="-t" 156 | fi 157 | else 158 | WAITFORIT_ISBUSY=0 159 | fi 160 | 161 | if [[ $WAITFORIT_CHILD -gt 0 ]]; then 162 | wait_for 163 | WAITFORIT_RESULT=$? 164 | exit $WAITFORIT_RESULT 165 | else 166 | if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then 167 | wait_for_wrapper 168 | WAITFORIT_RESULT=$? 169 | else 170 | wait_for 171 | WAITFORIT_RESULT=$? 172 | fi 173 | fi 174 | 175 | if [[ $WAITFORIT_CLI != "" ]]; then 176 | if [[ $WAITFORIT_RESULT -ne 0 && $WAITFORIT_STRICT -eq 1 ]]; then 177 | echoerr "$WAITFORIT_cmdname: strict mode, refusing to execute subprocess" 178 | exit $WAITFORIT_RESULT 179 | fi 180 | exec "${WAITFORIT_CLI[@]}" 181 | else 182 | exit $WAITFORIT_RESULT 183 | fi 184 | --------------------------------------------------------------------------------