├── log
    └── .gitkeep
├── .ruby-version
├── .ruby-gemset
├── Procfile
├── public
    ├── javascripts
    │   ├── application.js
    │   └── jquery-ujs.js
    └── favicon.ico
├── config
    ├── unicorn.rb
    ├── puma.rb
    ├── env.rb
    ├── boot.rb
    └── apps.rb
├── spec
    ├── fixtures
    │   ├── geo_no_files
    │   │   └── data.yaml
    │   ├── minimal
    │   │   └── data.yaml
    │   ├── cities_with_yml
    │   │   ├── more.csv
    │   │   ├── data.yml
    │   │   ├── cities51-100.csv
    │   │   └── cities50.csv
    │   ├── import_with_options
    │   │   ├── more_cities.csv
    │   │   ├── cities4.csv
    │   │   └── data.yaml
    │   ├── cities_without_yml
    │   │   ├── more.csv
    │   │   ├── cities51-100.csv
    │   │   └── cities50.csv
    │   ├── import_with_dictionary
    │   │   ├── more.csv
    │   │   ├── data.yaml
    │   │   ├── cities51-100.csv
    │   │   └── cities50.csv
    │   ├── invalid_utf8.csv
    │   ├── bom
    │   │   ├── bom.csv
    │   │   └── data.yaml
    │   ├── calculated_columns
    │   │   ├── schools.csv
    │   │   └── data.yaml
    │   ├── types
    │   │   ├── places.csv
    │   │   └── data.yaml
    │   ├── import_with_errors
    │   │   ├── cities4.csv
    │   │   └── data.yaml
    │   ├── import_with_null_value
    │   │   ├── null_values.csv
    │   │   └── data.yaml
    │   ├── school_names
    │   │   ├── data.yaml
    │   │   └── school_names.csv
    │   ├── geo
    │   │   ├── places.csv
    │   │   └── data.yaml
    │   ├── numeric_data
    │   │   └── data.yaml
    │   ├── nested_files
    │   │   ├── school2011.csv
    │   │   ├── school2012.csv
    │   │   ├── data.yaml
    │   │   ├── school2013.csv
    │   │   └── school-data.csv
    │   ├── nested2
    │   │   ├── data.yaml
    │   │   └── school2013.csv
    │   ├── data.rb
    │   ├── schools
    │   │   ├── schools.csv
    │   │   └── data.yaml
    │   └── sample-data
    │   │   └── data.yaml
    ├── lib
    │   ├── expression
    │   │   ├── variables_spec.rb
    │   │   ├── eval_spec.rb
    │   │   └── parser_spec.rb
    │   ├── data_magic
    │   │   ├── index
    │   │   │   ├── importer_spec.rb
    │   │   │   ├── event_logger_spec.rb
    │   │   │   ├── document_spec.rb
    │   │   │   └── repository_spec.rb
    │   │   ├── example_spec.rb
    │   │   ├── import_csv_spec.rb
    │   │   ├── name_type_spec.rb
    │   │   ├── calculated_columns_spec.rb
    │   │   ├── create_index_spec.rb
    │   │   ├── import_with_nested_files_spec.rb
    │   │   ├── import_without_data_yaml_spec.rb
    │   │   ├── config_field_types_spec.rb
    │   │   ├── search_name_spec.rb
    │   │   └── config_spec.rb
    │   ├── zipcode_spec.rb
    │   ├── data_magic_spec.rb
    │   ├── expression_spec.rb
    │   └── nested_hash_spec.rb
    ├── spec.rake
    ├── tasks
    │   └── import_spec.rb
    ├── spec_helper.rb
    └── features
    │   └── web_spec.rb
├── doc
    ├── csv-download.png
    └── data-overview.png
├── Rakefile
├── .components
├── script
    ├── bomstrip.sh
    ├── makeutf8.sh
    ├── s3push
    ├── s3pull
    ├── s3config.rb
    └── bootstrap
├── config.ru
├── manifest-dev.yml
├── manifest-staging.yml
├── manifest-production.yml
├── manifest-ex.yml
├── manifest-indexing.yml
├── app
    ├── views
    │   ├── layouts
    │   │   └── application.erb
    │   ├── home.liquid
    │   └── category.liquid
    ├── index_app.rb
    ├── app.rb
    ├── stylesheets
    │   └── application.sass
    └── controllers.rb
├── .rubocop.yml
├── bin
    └── open-data-maker
├── lib
    ├── expression
    │   ├── variables.rb
    │   ├── eval.rb
    │   ├── expression.rb
    │   └── parser.rb
    ├── data_magic
    │   ├── category.rb
    │   ├── example.rb
    │   ├── index
    │   │   ├── builder_data.rb
    │   │   ├── event_logger.rb
    │   │   ├── document.rb
    │   │   ├── super_client.rb
    │   │   ├── output.rb
    │   │   ├── repository.rb
    │   │   ├── row_importer.rb
    │   │   ├── importer.rb
    │   │   └── document_builder.rb
    │   ├── index.rb
    │   ├── error_checker.rb
    │   └── query_builder.rb
    ├── sass_initializer.rb
    ├── zipcode
    │   └── zipcode.rb
    └── nested_hash.rb
├── .gitignore
├── tasks
    ├── es.rake
    └── import.rake
├── circle.yml
├── NOTES.md
├── Gemfile
├── LICENSE.md
├── DICTIONARY.md
├── notes.txt
├── sample-data
    └── data.yaml
├── INSTALL.md
├── README.md
└── Gemfile.lock


/log/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.ruby-version:
--------------------------------------------------------------------------------
1 | 2.2.4
2 | 


--------------------------------------------------------------------------------
/.ruby-gemset:
--------------------------------------------------------------------------------
1 | open-data-maker
2 | 


--------------------------------------------------------------------------------
/Procfile:
--------------------------------------------------------------------------------
1 | web: bundle exec puma -C config/puma.rb
2 | 


--------------------------------------------------------------------------------
/public/javascripts/application.js:
--------------------------------------------------------------------------------
1 | // Put your application scripts here


--------------------------------------------------------------------------------
/config/unicorn.rb:
--------------------------------------------------------------------------------
1 | worker_processes 5
2 | timeout 30
3 | preload_app true
4 | 


--------------------------------------------------------------------------------
/spec/fixtures/geo_no_files/data.yaml:
--------------------------------------------------------------------------------
1 | # data.yaml for geo tests
2 | index: place-data
3 | 


--------------------------------------------------------------------------------
/spec/fixtures/minimal/data.yaml:
--------------------------------------------------------------------------------
1 | # smallest possible data.yaml
2 | index: my-index
3 | 


--------------------------------------------------------------------------------
/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/18F/open-data-maker/HEAD/public/favicon.ico


--------------------------------------------------------------------------------
/doc/csv-download.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/18F/open-data-maker/HEAD/doc/csv-download.png


--------------------------------------------------------------------------------
/doc/data-overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/18F/open-data-maker/HEAD/doc/data-overview.png


--------------------------------------------------------------------------------
/spec/fixtures/cities_with_yml/more.csv:
--------------------------------------------------------------------------------
1 | state,city,lat,lon
2 | CA,Secret City,37.727239,-123.032229
3 | 


--------------------------------------------------------------------------------
/spec/fixtures/import_with_options/more_cities.csv:
--------------------------------------------------------------------------------
1 | USPS,GEOID,ANSICODE,NAME,POP10
2 | XX,0,0,YY,0
3 | 


--------------------------------------------------------------------------------
/spec/fixtures/cities_without_yml/more.csv:
--------------------------------------------------------------------------------
1 | state,city,lat,lon
2 | CA,Secret City,37.727239,-123.032229
3 | 


--------------------------------------------------------------------------------
/spec/fixtures/import_with_dictionary/more.csv:
--------------------------------------------------------------------------------
1 | state,city,lat,lon
2 | CA,Secret City,37.727239,-123.032229
3 | 


--------------------------------------------------------------------------------
/spec/fixtures/invalid_utf8.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/18F/open-data-maker/HEAD/spec/fixtures/invalid_utf8.csv


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | require 'bundler/setup'
2 | require 'padrino-core/cli/rake'
3 | 
4 | 
5 | task :default => :spec
6 | 
7 | PadrinoTasks.init
8 | 


--------------------------------------------------------------------------------
/spec/fixtures/bom/bom.csv:
--------------------------------------------------------------------------------
1 | UNITID,VAL
2 | 100654,00100200
3 | 100663,00105200
4 | 100690,02503400
5 | 100706,00105500
6 | 100724,00100500
7 | 


--------------------------------------------------------------------------------
/spec/fixtures/calculated_columns/schools.csv:
--------------------------------------------------------------------------------
1 | UNITID,INSTNM,INT1,INT2,INT3,INT4
2 | 1,Big School,0,0,2,0
3 | 2,Small School,0,0,0,0
4 | 3,Middle School,0,1,1,0
5 | 


--------------------------------------------------------------------------------
/spec/fixtures/bom/data.yaml:
--------------------------------------------------------------------------------
 1 | version: byte-order-mark
 2 | index: test-data
 3 | api: test
 4 | dictionary:
 5 |   id: UNITID
 6 |   value: VAL
 7 | 
 8 | files:
 9 |   - name: bom.csv
10 | 


--------------------------------------------------------------------------------
/spec/fixtures/types/places.csv:
--------------------------------------------------------------------------------
1 | id,state,name,lat,lon
2 | ca sf,CA,San Francisco,37.727239,-123.032229
3 | ny ny,NY,New York,40.664274,-73.938500
4 | la no,LA,New Orleans,30.068636,-89.939007
5 | 


--------------------------------------------------------------------------------
/.components:
--------------------------------------------------------------------------------
 1 | ---
 2 | :orm: none
 3 | :test: rspec
 4 | :mock: none
 5 | :script: jquery
 6 | :renderer: liquid
 7 | :stylesheet: sass
 8 | :namespace: OpenDataMaker
 9 | :migration_format: number
10 | 


--------------------------------------------------------------------------------
/spec/fixtures/import_with_errors/cities4.csv:
--------------------------------------------------------------------------------
1 | USPS,GEOID,ANSICODE,NAME,POP10
2 | NY,3651000,2395220,New York,8175133
3 | CA,644000,2410877,Los Angeles,3792621
4 | IL,1714000,428803,Chicago,2695598
5 | TX,4835000,2410796,Houston,2099451


--------------------------------------------------------------------------------
/spec/fixtures/import_with_options/cities4.csv:
--------------------------------------------------------------------------------
1 | USPS,GEOID,ANSICODE,NAME,POP10
2 | NY,3651000,2395220,New York,8175133
3 | CA,644000,2410877,Los Angeles,3792621
4 | IL,1714000,428803,Chicago,2695598
5 | TX,4835000,2410796,Houston,2099451


--------------------------------------------------------------------------------
/spec/fixtures/import_with_null_value/null_values.csv:
--------------------------------------------------------------------------------
1 | USPS,GEOID,ANSICODE,NAME,POP10
2 | NY,abc123,2395220,New York,8175133
3 | CA,644000,2410877,Los Angeles,3792621
4 | IL,1714000,428803,Chicago,2695598
5 | TX,4835000,2410796,Houston,2099451
6 | 


--------------------------------------------------------------------------------
/script/bomstrip.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | mkdir -p new
3 | 
4 | for filename in ./*.csv; do
5 |       awk 'NR==1{sub(/^\xef\xbb\xbf/,"")}1' "$filename" > new/$filename
6 | done
7 | 
8 | #find . -print0 -type f |  awk 'NR==1{sub(/^\xef\xbb\xbf/,"")}1' {} > new/{}
9 | 


--------------------------------------------------------------------------------
/config.ru:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env rackup
 2 | # encoding: utf-8
 3 | 
 4 | # This file can be used to start Padrino,
 5 | # just execute it from the command line.
 6 | 
 7 | require File.expand_path("../config/boot.rb", __FILE__)
 8 | 
 9 | run Padrino.application
10 | 


--------------------------------------------------------------------------------
/spec/fixtures/school_names/data.yaml:
--------------------------------------------------------------------------------
 1 | version: 0
 2 | index: name-data
 3 | api: names
 4 | dictionary:
 5 |   id: ID
 6 |   school.name:
 7 |     source: NAME
 8 |     type: autocomplete
 9 |   school.state: STATE
10 | 
11 | files:
12 |   - name: school_names.csv
13 | 


--------------------------------------------------------------------------------
/script/makeutf8.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # convert to utf8 and strip Byte Order Mark (BOM) if present
3 | mkdir -p utf8
4 | 
5 | for file in *.csv; do
6 |     echo "$file"
7 |     iconv -f ascii -t utf-8 "$file" | awk 'NR==1{sub(/^\xef\xbb\xbf/,"")}1' > "./utf8/${file%.txt}"
8 | done
9 | 


--------------------------------------------------------------------------------
/manifest-dev.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | applications:
 3 | -  name: ccapi-dev
 4 |    command: bundle exec puma -C ./config/puma.rb
 5 |    instances: 1
 6 |    memory: 2G
 7 |    services:
 8 |    - bservice
 9 |    - eservice
10 |    env:
11 |      MAX_THREADS: 5
12 |      WEB_CONCURRENCY: 3
13 | 


--------------------------------------------------------------------------------
/manifest-staging.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | applications:
 3 | -  name: ccapi-staging
 4 |    command: bundle exec puma -C ./config/puma.rb
 5 |    instances: 3
 6 |    memory: 2G
 7 |    services:
 8 |    - bservice
 9 |    - eservice
10 |    env:
11 |      MAX_THREADS: 5
12 |      WEB_CONCURRENCY: 3
13 | 


--------------------------------------------------------------------------------
/manifest-production.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | applications:
 3 | -  name: ccapi-production
 4 |    command: bundle exec puma -C ./config/puma.rb
 5 |    instances: 3
 6 |    memory: 2G
 7 |    services:
 8 |    - bservice
 9 |    - eservice
10 |    env:
11 |      MAX_THREADS: 5
12 |      WEB_CONCURRENCY: 3
13 | 


--------------------------------------------------------------------------------
/spec/fixtures/import_with_null_value/data.yaml:
--------------------------------------------------------------------------------
 1 | index: city-data
 2 | api: cities
 3 | unique: ['name']
 4 | null_value: 'abc123'
 5 | options:
 6 |   columns: all
 7 | 
 8 | dictionary:
 9 |   state: USPS
10 |   population: POP10
11 |   name: NAME
12 | 
13 | files:
14 |   - name: null_values.csv
15 | 


--------------------------------------------------------------------------------
/spec/fixtures/import_with_errors/data.yaml:
--------------------------------------------------------------------------------
 1 | version: fixture-type-error
 2 | index: expect-errors
 3 | api: nothing
 4 | 
 5 | dictionary:
 6 |   state: USPS
 7 |   name: NAME
 8 |   population:
 9 |     source: POP10
10 |     type: broken
11 | 
12 | files:
13 |   - name: cities4.csv
14 |     add:
15 |       year: 2010
16 | 


--------------------------------------------------------------------------------
/manifest-ex.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | applications:
 3 | -  name: ccapi-ex
 4 |    command: bundle exec puma -C ./config/puma.rb
 5 |    instances: 1
 6 |    memory: 2G
 7 |    services:
 8 |    - bservice
 9 |    - eservice
10 |    env:
11 |      MAX_THREADS: 5
12 |      WEB_CONCURRENCY: 1
13 |      INDEX_APP: enable
14 |      NPROCS: 2
15 | 


--------------------------------------------------------------------------------
/config/puma.rb:
--------------------------------------------------------------------------------
 1 | workers Integer(ENV['WEB_CONCURRENCY'] || 2)
 2 | threads_count = Integer(ENV['MAX_THREADS'] || 5)
 3 | threads threads_count, threads_count
 4 | worker_timeout 30
 5 |  
 6 | preload_app!
 7 |  
 8 | rackup      DefaultRackup
 9 | port        ENV['PORT']     || 3000
10 | environment ENV['RACK_ENV'] || 'development'
11 | 


--------------------------------------------------------------------------------
/manifest-indexing.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | applications:
 3 | -  name: ccapi-indexing
 4 |    command: bundle exec puma -C ./config/puma.rb
 5 |    instances: 1
 6 |    memory: 2G
 7 |    services:
 8 |    - bservice
 9 |    - eservice
10 |    env:
11 |      MAX_THREADS: 5
12 |      WEB_CONCURRENCY: 1
13 |      INDEX_APP: enable
14 |      NPROCS: 2
15 | 


--------------------------------------------------------------------------------
/app/views/layouts/application.erb:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | 
 3 | <html>
 4 |   <head>
 5 |     <link rel="stylesheet" type="text/css" href="/stylesheets/application.css">
 6 |     <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script>
 7 |   </head>
 8 |   <body>
 9 |     <%== yield %>
10 |   </body>
11 | </html>
12 | 


--------------------------------------------------------------------------------
/.rubocop.yml:
--------------------------------------------------------------------------------
 1 | AllCops:
 2 |   Exclude:
 3 |     - 'bin/**/*'
 4 |     - 'db/**/*'
 5 | Metrics/LineLength:
 6 |   Enabled: false
 7 | Style/CommentAnnotation:
 8 |   Enabled: false
 9 | Style/Documentation:
10 |   Enabled: false
11 | Style/DotPosition:
12 |   Enabled: false
13 | Style/RedundantSelf:
14 |   Enabled: false
15 | Style/StringLiterals:
16 |   Enabled: false
17 | 


--------------------------------------------------------------------------------
/spec/fixtures/school_names/school_names.csv:
--------------------------------------------------------------------------------
 1 | ID,STATE,NAME
 2 | 1,AL,Stillman College
 3 | 2,NY,New York University
 4 | 3,AZ,Arizona State University
 5 | 4,CA,University of California-Berkeley
 6 | 5,MA,Berklee College of Music
 7 | 6,NY,Berk Trade and Business School
 8 | 7,AZ,University of Phoenix-Online Campus
 9 | 8,AZ,University of Phoenix-Phoenix Campus
10 | 9,AZ,Phoenix College
11 | 


--------------------------------------------------------------------------------
/spec/fixtures/geo/places.csv:
--------------------------------------------------------------------------------
 1 | state,city,lat,lon
 2 | CA,"San Francisco",37.727239,-123.032229
 3 | NY,"New York",40.664274,-73.938500
 4 | CA,"Los Angeles",34.019394,-118.410825
 5 | IL,Chicago,41.837551,-87.681844
 6 | TX,Houston,29.780472,-95.386342
 7 | PA,Philadelphia,40.009376,-75.133346
 8 | CA,"San Jose",37.296867,-121.819306
 9 | MA,Boston,42.331960,-71.020173
10 | WA,Seattle,47.620499,-122.350876
11 | 


--------------------------------------------------------------------------------
/spec/fixtures/numeric_data/data.yaml:
--------------------------------------------------------------------------------
 1 | # cities100.txt
 2 | # Test YAML file
 3 | index: numeric-data
 4 | api: cities
 5 | 
 6 | dictionary:
 7 |   name:
 8 |     source: name
 9 |     type: string
10 |   address:
11 |     source: address
12 |     type: string
13 |   city:
14 |     source: city
15 |     type: string
16 |   age:
17 |     source: age
18 |     type: integer
19 |   height:
20 |     source: height
21 |     type: float
22 | 


--------------------------------------------------------------------------------
/bin/open-data-maker:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | 
 3 | Dir.chdir(File.dirname(__FILE__)+'/..')
 4 | 
 5 | # Start the app with Padrino::Server
 6 | require 'rubygems'
 7 | require 'bundler/setup'
 8 | require 'padrino-core/cli/launcher'
 9 | 
10 | ARGV.unshift('start') if ARGV.first.nil? || ARGV.first.start_with?('-')
11 | Padrino::Cli::Launcher.start ARGV
12 | 
13 | # Start the app with Rack::Server
14 | #require "rack"
15 | #Rack::Server.start
16 | 


--------------------------------------------------------------------------------
/spec/fixtures/import_with_options/data.yaml:
--------------------------------------------------------------------------------
 1 | version: fixture-import-options
 2 | index: city-data
 3 | api: cities
 4 | options:
 5 |   columns: all
 6 |   limit_files: 1
 7 |   limit_rows: 3
 8 | 
 9 | dictionary:
10 |   state: USPS
11 |   name: NAME
12 |   population: POP10
13 | 
14 | files:
15 |   - name: cities4.csv
16 |     add:
17 |       year: 2010
18 |   - name: more_cities.csv # this shouldn't get imported
19 |     add:
20 |       year: 1000
21 | 


--------------------------------------------------------------------------------
/lib/expression/variables.rb:
--------------------------------------------------------------------------------
 1 | require 'parslet'
 2 | 
 3 | class Expression
 4 |   class Variables < Parslet::Transform
 5 |     rule(:var => simple(:var)) {
 6 |       [String(var)]
 7 |     }
 8 |     rule(:or => { :left => subtree(:left), :right => subtree(:right) }) do
 9 |       (left + right)
10 |     end
11 | 
12 |     rule(:and => { :left => subtree(:left), :right => subtree(:right) }) do
13 |       (left + right)
14 |     end
15 | 
16 |   end
17 | end
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | log/**/*
 3 | log/*.log
 4 | tmp/**/*
 5 | vendor/gems/*
 6 | !vendor/gems/cache/
 7 | .sass-cache/*
 8 | db/*.db
 9 | .*.sw*
10 | .env
11 | .*.env
12 | .cfignore
13 | .vagrant
14 | .idea/
15 | *profile*
16 | 
17 | public/stylesheets/application.css*
18 | 
19 | # expect people to put their own data in /data
20 | data
21 | 
22 | # another commonly used data directory
23 | real-data
24 | 
25 | # contains Google API tokens
26 | client_secret.json
27 | 


--------------------------------------------------------------------------------
/script/s3push:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby -v
 2 | 
 3 | require_relative 's3config.rb'
 4 | 
 5 | @s3 = ::Aws::S3::Client.new
 6 | 
 7 | dirname = 'real-data'
 8 | bucket_name = ENV['s3_bucket']
 9 | datayamlpath = File.expand_path("../../#{dirname}/#{bucket_name}.yaml",  __FILE__)
10 | 
11 | puts "copying #{datayamlpath}"
12 | puts "to S3 #{bucket_name}"
13 | File.open(datayamlpath, 'r') do |file|
14 |   @s3.put_object(bucket: bucket_name, key: 'data.yaml', body: file)
15 | end
16 | 


--------------------------------------------------------------------------------
/tasks/es.rake:
--------------------------------------------------------------------------------
 1 | require 'data_magic'
 2 | 
 3 | namespace :es do
 4 |   desc "delete elasticsearch index (_all for all)"
 5 |   task :delete, [:index_name] => :environment do |t, args|
 6 |     DataMagic.client.indices.delete(index: args[:index_name])
 7 |   end
 8 | 
 9 |   desc "list elasticsearch indices"
10 |   task :list => :environment do |t, args|
11 |     result = DataMagic.client.indices.get(index: '_all').keys
12 |     puts result.join("\n")
13 |   end
14 | end
15 | 


--------------------------------------------------------------------------------
/script/s3pull:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby -v
 2 | 
 3 | require_relative 's3config.rb'
 4 | 
 5 | @s3 = ::Aws::S3::Client.new
 6 | 
 7 | bucket = ENV['s3_bucket']
 8 | 
 9 | dirname = 'real-data'
10 | unless File.directory?(dirname)
11 |   FileUtils.mkdir_p(dirname)
12 | end
13 | datayamlpath = File.expand_path("../../#{dirname}/#{bucket}.yaml",  __FILE__)
14 | 
15 | File.open(datayamlpath, 'w') do |file|
16 |   response = @s3.get_object(bucket: bucket, key: 'data.yaml')
17 |   file << response.body.read
18 | end
19 | 


--------------------------------------------------------------------------------
/app/index_app.rb:
--------------------------------------------------------------------------------
 1 | require 'csv'
 2 | 
 3 | module OpenDataMaker
 4 | 
 5 |   class IndexApp < Padrino::Application
 6 |     register SassInitializer
 7 |     register Padrino::Helpers
 8 | 
 9 |     enable :sessions
10 | 
11 |     get '/' do
12 |       DataMagic.config.scoped_index_name
13 |     end
14 | 
15 |     get '/init' do
16 |       DataMagic.init(load_now: true)
17 |       "ok"
18 |     end
19 | 
20 |     get '/reindex' do
21 |       DataMagic.reindex
22 |       "reindexing..."
23 |     end
24 |   end
25 | 
26 | end
27 | 


--------------------------------------------------------------------------------
/lib/data_magic/category.rb:
--------------------------------------------------------------------------------
 1 | Category = Struct.new(:category_id) do
 2 |   def assemble
 3 |     category_entry = DataMagic.config.data['categories'][category_id]
 4 |     dictionary = DataMagic.config.dictionary
 5 |     field_details = {}
 6 |     category_entry['fields'].each do |field_name|
 7 |       field_details[field_name] = dictionary[field_name] || { "description"=>"" }
 8 |     end
 9 |     field_details = { "field_details" => field_details }
10 |     assemble = category_entry.merge(field_details)
11 |   end
12 | end
13 | 


--------------------------------------------------------------------------------
/lib/expression/eval.rb:
--------------------------------------------------------------------------------
 1 | 
 2 | class Expression
 3 |   class Eval < Parslet::Transform
 4 |     rule(:var => simple(:var)) {
 5 |       variables[String(var)]
 6 |     }
 7 | 
 8 |     # in Ruby 0 is 'truthy' but that's not what most people expect
 9 |     rule(:or => { :left => subtree(:left), :right => subtree(:right) }) do
10 |       left == 0 ? right : (left or right)
11 |     end
12 | 
13 |     rule(:and => { :left => subtree(:left), :right => subtree(:right) }) do
14 |       left == 0 ? left : (left and right)
15 |     end
16 |   end
17 | end
18 | 


--------------------------------------------------------------------------------
/lib/sass_initializer.rb:
--------------------------------------------------------------------------------
 1 | module SassInitializer
 2 |   def self.registered(app)
 3 |     # Enables support for SASS template reloading in rack applications.
 4 |     # See http://nex-3.com/posts/88-sass-supports-rack for more details.
 5 |     # Store SASS files (by default) within 'app/stylesheets'.
 6 |     require 'sass/plugin/rack'
 7 |     Sass::Plugin.options[:template_location] = Padrino.root("app/stylesheets")
 8 |     Sass::Plugin.options[:css_location] = Padrino.root("public/stylesheets")
 9 |     app.use Sass::Plugin::Rack
10 |   end
11 | end
12 | 


--------------------------------------------------------------------------------
/lib/data_magic/example.rb:
--------------------------------------------------------------------------------
 1 | class Example < Hashie::Mash
 2 |   include Hashie::Extensions::Coercion
 3 |   include Hashie::Extensions::MergeInitializer
 4 |   coerce_key :name, String
 5 |   coerce_key :description, String
 6 |   coerce_key :params, String
 7 |   coerce_key :endpoint, String
 8 |   coerce_key :link, String
 9 |   def initialize(hash = {})
10 |    super
11 |    # we want to use this in a liquid template
12 |    # so all attributes needs to be plain data, not code
13 |    self[:link] = "/v1/#{endpoint}?#{params}" if self[:link].nil?
14 |  end
15 | 
16 | end
17 | 


--------------------------------------------------------------------------------
/spec/fixtures/geo/data.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # cities100.txt
 4 | # National Places Gazetteer Files, from US Census 2010
 5 | # https://www.census.gov/geo/maps-data/data/gazetteer2010.html
 6 | # (head -n 1 source.txt && tail -n +2 source.txt | LC_ALL=C sort -k7rn,7 -t$'\t' source.txt) > result.txt
 7 | # head -n 101 results.txt > cities100.txt
 8 | # then convertes to csv and removed " city" from after each city name
 9 | dictionary:
10 |   city: city
11 |   location.lat: lat
12 |   location.lon: lon
13 | 
14 | index: place-data
15 | api: places
16 | files:
17 |   - name: places.csv
18 | 


--------------------------------------------------------------------------------
/lib/data_magic/index/builder_data.rb:
--------------------------------------------------------------------------------
 1 | module DataMagic
 2 |   module Index
 3 |     class BuilderData
 4 |       attr_reader :data, :options
 5 | 
 6 |       def initialize(data, options)
 7 |         @options = options
 8 |         @data = data
 9 |       end
10 | 
11 |       def additional_fields
12 |         options[:mapping] || {}
13 |       end
14 | 
15 |       def new_field_names
16 |         field_names = options[:fields] || {}
17 |         field_names.merge(additional_fields)
18 |       end
19 | 
20 |       def additional_data
21 |         options[:add_data]
22 |       end
23 |     end
24 |   end
25 | end
26 | 


--------------------------------------------------------------------------------
/spec/lib/expression/variables_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'expression/parser'
 2 | require 'expression/variables'
 3 | 
 4 | describe Expression::Variables do
 5 | 
 6 |   let(:parser) { Expression::Parser.new }
 7 |   let(:variables) { Expression::Variables.new }
 8 |   it "gets one variable name" do
 9 |     expect(variables.apply(parser.parse('one'))).to eq(['one'])
10 |   end
11 |   it "preserves case " do
12 |     expect(variables.apply(parser.parse('ONe'))).to eq(['ONe'])
13 |   end
14 |   it "multiple variables" do
15 |     expect(variables.apply(parser.parse('fox or cow or goat'))).to eq(%w[fox cow goat])
16 |   end
17 | 
18 | end
19 | 


--------------------------------------------------------------------------------
/spec/spec.rake:
--------------------------------------------------------------------------------
 1 | begin
 2 |   require 'rspec/core/rake_task'
 3 | 
 4 |   spec_tasks = Dir['spec/*/'].each_with_object([]) do |d, result|
 5 |     result << File.basename(d) unless Dir["#{d}*"].empty?
 6 |   end
 7 | 
 8 |   spec_tasks.each do |folder|
 9 |     desc "Run the spec suite in #{folder}"
10 |     RSpec::Core::RakeTask.new("spec:#{folder}") do |t|
11 |       t.pattern = "./spec/#{folder}/**/*_spec.rb"
12 |       t.rspec_opts = "--color"
13 |     end
14 |   end
15 | 
16 |   desc "Run complete application spec suite"
17 |   RSpec::Core::RakeTask.new(:spec)
18 | rescue LoadError
19 |   puts "RSpec is not part of this bundle, skip specs."
20 | end
21 | 


--------------------------------------------------------------------------------
/config/env.rb:
--------------------------------------------------------------------------------
 1 | # define core environment that we need in tests and for the app
 2 | 
 3 | # Defines our constants
 4 | ENV['RACK_ENV'] ||= 'development'
 5 | RACK_ENV          = ENV['RACK_ENV'] unless defined?(RACK_ENV)
 6 | PADRINO_ROOT      = File.expand_path('../..', __FILE__) unless defined?(PADRINO_ROOT)
 7 | 
 8 | # Load our dependencies
 9 | require 'rubygems' unless defined?(Gem)
10 | require 'bundler/setup'
11 | require 'newrelic_rpm'
12 | Bundler.require(:default, RACK_ENV)
13 | 
14 | # do this early so we can log during startup
15 | require './lib/data_magic/config.rb'
16 | DataMagic::Config.logger=Logger.new(STDOUT) if ENV['VCAP_APPLICATION']    # Cloud Foundry
17 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/index/importer_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'data_magic'
 3 | 
 4 | describe "DataMagic::Index::Importer" do
 5 |   before do
 6 |     ENV['DATA_PATH'] = './spec/fixtures/minimal'
 7 |     DataMagic.init(load_now: false)
 8 |   end
 9 |   after do
10 |     DataMagic.destroy
11 |   end
12 | 
13 |   it "indexes in parallel based on NPROCS" do
14 |     stub_const('ENV', { 'NPROCS' => '2' })
15 | 
16 |     data_str = <<-eos
17 | a,b
18 | 1,2
19 | 3,4
20 | eos
21 |     data = StringIO.new(data_str)
22 |     num_rows, fields = DataMagic.import_csv(data)
23 |     expect(num_rows).to be(2)
24 |     expect(fields).to eq(['a', 'b'])
25 |   end
26 | end
27 | 


--------------------------------------------------------------------------------
/spec/lib/zipcode_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'zipcode/zipcode'
 2 | 
 3 | describe Zipcode do
 4 |   it "gives a location based on zipcode" do
 5 |     location = Zipcode.latlon('94132')
 6 |     expect(location).to eq(lat: 37.7211, lon: -122.4754)
 7 |   end
 8 |   it "supports zipcode given as a number" do
 9 |     location = Zipcode.latlon(94132)
10 |     expect(location).to eq(lat: 37.7211, lon: -122.4754)
11 |   end
12 | 
13 |   describe '#valid' do
14 |     it "returns true if the zipcode is valid" do
15 |       expect(Zipcode.valid? 94132).to eq(true)
16 |     end
17 |     it "returns false if the zipcode is invalid" do
18 |       expect(Zipcode.valid? 00002).to eq(false)
19 |     end
20 |   end
21 | end
22 | 


--------------------------------------------------------------------------------
/spec/fixtures/types/data.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | version: 0
 3 | # cities100.txt
 4 | # National Places Gazetteer Files, from US Census 2010
 5 | # https://www.census.gov/geo/maps-data/data/gazetteer2010.html
 6 | # (head -n 1 source.txt && tail -n +2 source.txt | LC_ALL=C sort -k7rn,7 -t$'\t' source.txt) > result.txt
 7 | # head -n 101 results.txt > cities100.txt
 8 | # then convertes to csv and removed " city" from after each city name
 9 | dictionary:
10 |   id:
11 |     source: id
12 |     type: literal
13 |   city.name:
14 |     source: name
15 |     type: name
16 |   city.state: state
17 |   location.lat: lat
18 |   location.lon: lon
19 | 
20 | index: place-data
21 | api: places
22 | files:
23 |   - name: places.csv
24 | 


--------------------------------------------------------------------------------
/tasks/import.rake:
--------------------------------------------------------------------------------
 1 | require 'data_magic'
 2 | require 'ruby-prof'
 3 | 
 4 | desc "import files from DATA_PATH, rake import[profile=true] for profile output"
 5 | task :import, [:profile] => :environment do |t, args|
 6 |   options = {}
 7 |   start_time = Time.now
 8 |   RubyProf.start if args[:profile]
 9 | 
10 |   DataMagic.import_with_dictionary(options)
11 | 
12 |   if args[:profile]
13 |       result = RubyProf.stop
14 |     end_time = Time.now
15 |     puts "indexing complete: #{distance_of_time_in_words(end_time, start_time)}"
16 |     puts "duration: #{end_time - start_time}"
17 | 
18 |     printer = RubyProf::MultiPrinter.new(result);
19 |     printer.print(path: ".", profile: "profile", min_percent: 2)
20 |   end
21 | end
22 | 


--------------------------------------------------------------------------------
/spec/fixtures/cities_with_yml/data.yml:
--------------------------------------------------------------------------------
 1 | # cities100.txt
 2 | # National Places Gazetteer Files, from US Census 2010
 3 | # https://www.census.gov/geo/maps-data/data/gazetteer2010.html
 4 | # (head -n 1 source.txt && tail -n +2 source.txt | LC_ALL=C sort -k7rn,7 -t$'\t' source.txt) > result.txt
 5 | # head -n 101 results.txt > cities100.txt
 6 | # then convertes to csv and removed " city" from after each city name
 7 | version: fixture-import-all
 8 | index: city-data
 9 | api: cities
10 | global_mapping:
11 |   USPS: state
12 |   NAME: name
13 |   POP10: population
14 |   INTPTLAT: latitude
15 |   INTPTLONG: longitude
16 | 
17 | files:
18 |   - name: cities50.csv
19 |     add:
20 |       category: 'top50'
21 |   - name: cities51-100.csv
22 | 


--------------------------------------------------------------------------------
/spec/fixtures/import_with_dictionary/data.yaml:
--------------------------------------------------------------------------------
 1 | # cities100.txt
 2 | # National Places Gazetteer Files, from US Census 2010
 3 | # https://www.census.gov/geo/maps-data/data/gazetteer2010.html
 4 | # (head -n 1 source.txt && tail -n +2 source.txt | LC_ALL=C sort -k7rn,7 -t$'\t' source.txt) > result.txt
 5 | # head -n 101 results.txt > cities100.txt
 6 | # then convertes to csv and removed " city" from after each city name
 7 | version: fixture-import-all
 8 | index: city-data
 9 | api: cities
10 | dictionary:
11 |   state: USPS
12 |   name: NAME
13 |   population: POP10
14 |   latitude: INTPTLAT
15 |   longitude: INTPTLONG
16 | 
17 | files:
18 |   - name: cities50.csv
19 |     add:
20 |       category: 'top50'
21 |   - name: cities51-100.csv
22 | 


--------------------------------------------------------------------------------
/lib/expression/expression.rb:
--------------------------------------------------------------------------------
 1 | require_relative 'parser'
 2 | require_relative 'eval'
 3 | require_relative 'variables'
 4 | require 'hashie'
 5 | 
 6 | class Expression
 7 |   attr_accessor :name   # purely for reporting Errors
 8 |   attr_reader   :variables
 9 | 
10 |   def initialize(expr, name = 'unknown')
11 |     @tree = Parser.new.parse(expr)
12 |     @variables = Variables.new.apply(@tree)
13 |   end
14 | 
15 |   def evaluate(vars)
16 |     Hashie.stringify_keys! vars
17 |     Eval.new.apply(@tree, variables: vars)
18 |   end
19 | 
20 |   def self.find_or_create(expr, name = 'unknown')
21 |     @cached_expression ||= {}
22 |     @cached_expression[expr] ||= Expression.new(expr, name)
23 |     @cached_expression[expr]
24 |   end
25 | end
26 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/example_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | describe Example do
 4 |   let(:hash) do
 5 |     { name: 'foo',
 6 |       description: 'interesting thing',
 7 |       params: 'a=1&b=something',
 8 |       endpoint: 'api' }
 9 |   end
10 |   subject(:e) { Example.new(hash) }
11 | 
12 |   it "has a name" do
13 |     expect(e.name).to eq(hash[:name])
14 |   end
15 |   it "has a description" do
16 |     expect(e.description).to eq(hash[:description])
17 |   end
18 |   it "has a params" do
19 |     expect(e.params).to eq(hash[:params])
20 |   end
21 |   it "has an endpoint" do
22 |     expect(e.endpoint).to eq(hash[:endpoint])
23 |   end
24 | 
25 |   it "has a link" do
26 |     expect(e.link).to eq("/v1/#{e.endpoint}?#{e.params}")
27 |   end
28 | end
29 | 


--------------------------------------------------------------------------------
/spec/fixtures/nested_files/school2011.csv:
--------------------------------------------------------------------------------
 1 | UNITID,CITY_MAIN,STABBR_MAIN,ST_FIPS_MAIN,ZIP_MAIN,REGION_MAIN,LATITUDE_MAIN,LONGITUDE_MAIN,INSTNM,SAT_AVG,earn_2002_p10,gt_25k_2006_p6
 2 | 1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1025,4048,0.92
 3 | 2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,866,45556,0.34
 4 | 3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,453,4675,0.71
 5 | 4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,572,15466,0.34
 6 | 5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1111,11266,0.86
 7 | 6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,818,23357,0.58
 8 | 7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1392,32584,0.39
 9 | 8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,718,252,0.26
10 | 9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1297,36088,0.63
11 | 10,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,635,3259,0.70
12 | 


--------------------------------------------------------------------------------
/circle.yml:
--------------------------------------------------------------------------------
 1 | dependencies:
 2 |   cache_directories:
 3 |     - elasticsearch-1.7.1
 4 |   pre:
 5 |     - curl -v -L -o cf-cli_amd64.deb 'https://cli.run.pivotal.io/stable?release=debian64&source=github'
 6 |     - sudo dpkg -i cf-cli_amd64.deb
 7 |     - cf -v
 8 |   post:
 9 |     - if [[ ! -e elasticsearch-1.7.1 ]]; then wget https://download.elastic.co/elasticsearch/elasticsearch/elasticsearch-1.7.1.tar.gz && tar -xvf elasticsearch-1.7.1.tar.gz; fi
10 |     - elasticsearch-1.7.1/bin/elasticsearch: {background: true}
11 | 
12 | test:
13 |   post:
14 |     - cf api https://api.cloud.gov
15 |     - cf auth $CF_USER $CF_PASSWORD
16 |     - cf target -o ed -s dev
17 |     - cf a
18 | 
19 | deployment:
20 |   development:
21 |     branch: dev
22 |     commands:
23 |             - cf push -f manifest-dev.yml
24 | 


--------------------------------------------------------------------------------
/lib/data_magic/index/event_logger.rb:
--------------------------------------------------------------------------------
 1 | module DataMagic
 2 |   module Index
 3 |     class EventLogger
 4 |       def trigger(event, *args)
 5 |         self.send(event, *args)
 6 |       end
 7 | 
 8 |       ['debug', 'info', 'warn', 'error'].each do |level|
 9 |         class_eval <<-RUBY, __FILE__, __LINE__ + 1
10 |           def #{level}(message, object=nil, limit=nil)
11 |             logger.#{level}(full_message(message, object, limit))
12 |           end
13 |         RUBY
14 |       end
15 | 
16 |       def full_message(prefix, object, limit)
17 |         return prefix unless object
18 |         message = "#{prefix}: "
19 |         if limit
20 |           message << object.inspect[0..limit]
21 |         else
22 |           message << object.inspect
23 |         end
24 |         message
25 |       end
26 |     end
27 |   end
28 | end
29 | 


--------------------------------------------------------------------------------
/spec/fixtures/nested_files/school2012.csv:
--------------------------------------------------------------------------------
 1 | UNITID,CITY_MAIN,STABBR_MAIN,ST_FIPS_MAIN,ZIP_MAIN,REGION_MAIN,LATITUDE_MAIN,LONGITUDE_MAIN,INSTNM,SAT_AVG,earn_2002_p10,gt_25k_2006_p6
 2 | 1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,461,35231,0.01
 3 | 2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,986,34095,0.71
 4 | 3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1094,42579,0.39
 5 | 4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,854,37589,0.15
 6 | 5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,650,13611,0.04
 7 | 6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,797,36924,0.64
 8 | 7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,994,31799,0.60
 9 | 8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1420,30063,0.97
10 | 9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,1292,42150,0.83
11 | 10,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,605,2608,0.92
12 | 11,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,2608,0.92
13 | 


--------------------------------------------------------------------------------
/spec/tasks/import_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'bundler/setup'
 3 | require 'padrino-core/cli/rake'
 4 | 
 5 | describe 'elastic search index management rake task' do
 6 |   before do
 7 |     PadrinoTasks.init
 8 |     DataMagic.init(load_now: true)
 9 |   end
10 | 
11 |   after do
12 |     DataMagic.destroy
13 |   end
14 | 
15 |   context "imports" do
16 |     it "default sample-data" do
17 |       ENV['DATA_PATH'] = nil
18 |       expect { Rake::Task['import'].invoke }.not_to raise_exception
19 |     end
20 | 
21 |     it "correct configuration" do
22 |       dir_path = './spec/fixtures/import_with_dictionary'
23 |       ENV['DATA_PATH'] = dir_path
24 |       expect { Rake::Task['import'].invoke }.not_to raise_exception
25 |       expect(DataMagic.config.api_endpoint_names).to eq(['cities'])
26 |     end
27 | 
28 |   end
29 | 
30 | end
31 | 


--------------------------------------------------------------------------------
/spec/fixtures/calculated_columns/data.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | version: Aug6-2015-08-10-23:48-0600
 3 | api: fakeschool
 4 | index: fakeschool-data
 5 | unique:
 6 | - id
 7 | options:
 8 |   limit_files: 1
 9 |   limit_rows: 100
10 | 
11 | dictionary:
12 |   id:
13 |     source: UNITID
14 |     type: integer
15 |     description: Unit ID for institution
16 |   school.name:
17 |     source: INSTNM
18 |     description: Institution name
19 |   integer1:
20 |     source: INT1
21 |     type: integer
22 |   integer2:
23 |     source: INT2
24 |     type: integer
25 |   integer3:
26 |     source: INT3
27 |     type: integer
28 |   integer4:
29 |     source: INT4
30 |     type: integer
31 |   summarybool:
32 |     calculate: INT1 or INT2 or INT3 or INT4
33 |     type: boolean
34 |     description: are any of the unparsed booleans true?
35 | 
36 | files:
37 |   - name: schools.csv
38 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/import_csv_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'data_magic'
 3 | 
 4 | describe "DataMagic #import_csv" do
 5 |   before do
 6 |     ENV['DATA_PATH'] = './spec/fixtures/minimal'
 7 |     DataMagic.init(load_now: false)
 8 |   end
 9 |   after do
10 |     DataMagic.destroy
11 |     #expect(DataMagic.client.indices.get(index: '_all')).to be_empty
12 |   end
13 | 
14 |   it "throws errors for bad format" do
15 |     data = StringIO.new("not csv format")
16 |     expect{DataMagic.import_csv(data)}.to raise_error(DataMagic::InvalidData)
17 |   end
18 | 
19 |   it "reads file and reports number of rows and headers" do
20 |     data_str = <<-eos
21 | a,b
22 | 1,2
23 | 3,4
24 | eos
25 |     data = StringIO.new(data_str)
26 |     num_rows, fields = DataMagic.import_csv(data)
27 |     expect(num_rows).to be(2)
28 |     expect(fields).to eq(['a', 'b'])
29 |   end
30 | 
31 | end
32 | 


--------------------------------------------------------------------------------
/lib/data_magic/index/document.rb:
--------------------------------------------------------------------------------
 1 | module DataMagic
 2 |   module Index
 3 |     class Document
 4 |       attr_reader :data, :id
 5 | 
 6 |       def initialize(data)
 7 |         @data = data
 8 |         @id = calculate_id
 9 |       end
10 | 
11 |       def remove_ids
12 |         config.data['unique'].each { |key| data.delete key }
13 |       end
14 | 
15 |       def headers
16 |         data.keys.map(&:to_s) # does this only return top level fields?
17 |       end
18 | 
19 |       def preview(n=500)
20 |         data.inspect[0..n]
21 |       end
22 | 
23 |       def id_empty?
24 |         id && id.empty?
25 |       end
26 | 
27 |       private
28 | 
29 |       def calculate_id
30 |         return nil if config.data['unique'].length == 0
31 |         config.data['unique'].map { |field| data[field] }.join(':')
32 |       end
33 | 
34 |       def config
35 |         DataMagic.config
36 |       end
37 |     end
38 |   end
39 | end
40 | 


--------------------------------------------------------------------------------
/app/app.rb:
--------------------------------------------------------------------------------
 1 | require 'csv'
 2 | 
 3 | module OpenDataMaker
 4 |   class App < Padrino::Application
 5 |     register SassInitializer
 6 |     register Padrino::Helpers
 7 | 
 8 |     # This app is stateless and session cookies prevent caching of API responses
 9 |     disable :sessions
10 | 
11 |     # This app has no sensitive bits and csrf protection requires sessions
12 |     disable :protect_from_csrf
13 | 
14 |     if ENV['DATA_AUTH'] and not ENV['DATA_AUTH'].empty?
15 |       auth = ENV['DATA_AUTH']
16 |       authorized_user, authorized_pass = auth.split(',')
17 |       use Rack::Auth::Basic, "Restricted Area" do |username, password|
18 |         username == authorized_user and password == authorized_pass
19 |       end
20 |     end
21 | 
22 |     ## app setup
23 |     if ENV['RACK_ENV'] == 'test'
24 |       DataMagic.init(load_now: true)
25 |     else
26 |       DataMagic.init(load_now: false)   # don't index data
27 |     end
28 | 
29 |   end
30 | 
31 | end
32 | 


--------------------------------------------------------------------------------
/lib/data_magic/index/super_client.rb:
--------------------------------------------------------------------------------
 1 | require 'forwardable'
 2 | 
 3 | module DataMagic
 4 |   module Index
 5 |     class SuperClient
 6 |       attr_reader :client, :options
 7 | 
 8 |       def initialize(client, options)
 9 |         @client = client
10 |         @options = options
11 |       end
12 | 
13 |       def create_index
14 |         DataMagic.create_index unless config.index_exists?
15 |       end
16 | 
17 |       def refresh_index
18 |         client.indices.refresh index: index_name
19 |       end
20 | 
21 |       def creating?
22 |         options[:nest] == nil
23 |       end
24 | 
25 |       def allow_skips?
26 |         options[:nest][:parent_missing] == 'skip'
27 |       end
28 | 
29 |       def index_name
30 |         config.scoped_index_name
31 |       end
32 | 
33 |       def config
34 |         DataMagic.config
35 |       end
36 | 
37 |       extend Forwardable
38 | 
39 |       def_delegators :client, :index, :update
40 |     end
41 |   end
42 | end
43 | 


--------------------------------------------------------------------------------
/config/boot.rb:
--------------------------------------------------------------------------------
 1 | require_relative 'env.rb'
 2 | 
 3 | ##
 4 | # ## Enable devel logging
 5 | #
 6 | # Padrino::Logger::Config[:development][:log_level]  = :devel
 7 | # Padrino::Logger::Config[:development][:log_static] = true
 8 | #
 9 | # ## Configure your I18n
10 | #
11 | # I18n.default_locale = :en
12 | # I18n.enforce_available_locales = false
13 | #
14 | # ## Configure your HTML5 data helpers
15 | #
16 | # Padrino::Helpers::TagHelpers::DATA_ATTRIBUTES.push(:dialog)
17 | # text_field :foo, :dialog => true
18 | # Generates: <input type="text" data-dialog="true" name="foo" />
19 | #
20 | # ## Add helpers to mailer
21 | #
22 | # Mail::Message.class_eval do
23 | #   include Padrino::Helpers::NumberHelpers
24 | #   include Padrino::Helpers::TranslationHelpers
25 | # end
26 | 
27 | ##
28 | # Add your before (RE)load hooks here
29 | #
30 | Padrino.before_load do
31 | end
32 | 
33 | ##
34 | # Add your after (RE)load hooks here
35 | #
36 | Padrino.after_load do
37 | end
38 | 
39 | Padrino.load!
40 | 


--------------------------------------------------------------------------------
/spec/fixtures/nested_files/data.yaml:
--------------------------------------------------------------------------------
 1 | version: 0.2
 2 | api: school
 3 | index: fake-nested
 4 | unique: [id]
 5 | 
 6 | dictionary:
 7 |   id: UNITID
 8 |   name:
 9 |     source: INSTNM
10 |     type: literal
11 |   city: CITY_MAIN
12 |   state: STABBR_MAIN
13 |   zipcode: ZIP_MAIN
14 |   sat_average: SAT_AVG
15 |   location.lat: LATITUDE_MAIN
16 |   location.lon: LONGITUDE_MAIN
17 | 
18 |   earnings.6_yrs_after_entry.median:
19 |     source: earn_2002_p10
20 |     description: Median earnings of students
21 |     type: integer
22 | 
23 |   earnings.6_yrs_after_entry.percent_gt_25k:
24 |     source: gt_25k_2006_p6
25 |     description: Share of students earning over $25,000/year
26 |     type: float
27 | 
28 | files:
29 |   - name: school-data.csv
30 |     only: [id, name, city, state]
31 |   - name: school2013.csv
32 |     nest:
33 |       key: 2013
34 |       contents: [earnings, sat_average]
35 |   - name: school2012.csv
36 |     nest:
37 |       key: 2012
38 |       contents: [earnings, sat_average]
39 | 


--------------------------------------------------------------------------------
/spec/fixtures/nested2/data.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | version: Aug6-2015-08-10-23:48-0600
 3 | api: fakeschool
 4 | index: fakeschool-data
 5 | unique:
 6 | - id
 7 | options:
 8 | #  columns: all
 9 |   limit_files: 1
10 |   limit_rows: 100
11 |   search: dictionary_only
12 | 
13 | dictionary:
14 |   id:
15 |     source: UNITID
16 |     type: integer
17 |     description: Unit ID for institution
18 |   ope8_id:
19 |     source: OPEID
20 |     type: integer
21 |     description: 8-digit OPE ID for institution
22 |   ope6_id:
23 |     source: opeid6
24 |     type: integer
25 |     description: 6-digit OPE ID for institution
26 |   school.name:
27 |     source: INSTNM
28 |     type: literal
29 |     description: Institution name
30 |   school.city:
31 |     source: CITY_MAIN
32 |     description: City
33 |   school.state:
34 |     source: STABBR_MAIN
35 |     description: State postcode
36 |   school.zip:
37 |     source: ZIP_MAIN
38 |     type: integer
39 |     description: ZIP code
40 | 
41 | files:
42 |   - name: school2013.csv
43 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'data_magic'
 3 | require 'fixtures/data.rb'
 4 | 
 5 | describe DataMagic do
 6 |   it "cleans up after itself" do
 7 |     DataMagic.init(load_now: true)
 8 |     DataMagic.destroy
 9 |     DataMagic.logger.info "just destroyed"
10 |     #expect(DataMagic.client.indices.get(index: '_all')).to be_empty
11 |   end
12 | 
13 |   describe '.es_field_types' do
14 |     it 'returns the given fields with their specified type' do
15 |       expect(described_class.es_field_types({ 'state' => 'string', land_area: 'string' }))
16 |       .to eq("state" => { :type => "string" },
17 |           :land_area => { :type => "string" })
18 |     end
19 | 
20 |     context 'with custom type "literal"' do
21 |       it 'returns string type with :index of "not_analyzed"' do
22 |         expect(described_class.es_field_types({ 'state' => 'string', 'name' => 'literal' }))
23 |         .to eq({"state"=>{:type=>"string"}, "name"=>{:type=>"string", :index=>"not_analyzed"}})
24 |       end
25 |     end
26 | 
27 |   end
28 | 
29 | end
30 | 


--------------------------------------------------------------------------------
/NOTES.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Data
 3 | 
 4 | Details about the data are specified by DATA_PATH/data.yaml.  
 5 | Where DATA_PATH is an environment variable, which may be:
 6 | 
 7 | * `s3://username:password@bucket_name/path`
 8 | * `s3://bucket_name/path`
 9 | * `s3://bucket_name`
10 | * a local path like: `./data`
11 | 
12 | 
13 | This file is loaded the first time it is needed and then stored in memory.  The contents of `data.yaml` are stored as JSON in Elasticsearch in a single document of type `config` with id `1`.  
14 | 
15 | The version field of this document is checked at startup. If the new config has a new version, then we delete the whole index and re-index all of the files referred to in the `data.yaml` files section.
16 | 
17 | If no data.yml or data.yaml file is found, then all CSV files in `DATA_PATH` will be loaded, and all fields in their headers will be used.
18 | 
19 | ## Debugging
20 | 
21 | `ES_DEBUG` environment variable will turn on verbose tracer in the Elasticsearch client
22 | 
23 | optional performance profiling for rake import: `rake import[profile=true]`
24 | 


--------------------------------------------------------------------------------
/spec/fixtures/nested2/school2013.csv:
--------------------------------------------------------------------------------
 1 | UNITID,CITY_MAIN,STABBR_MAIN,ST_FIPS_MAIN,ZIP_MAIN,REGION_MAIN,LATITUDE_MAIN,LONGITUDE_MAIN,INSTNM,SAT_AVG,earn_2002_p10,gt_25k_2006_p6
 2 | 1,Normal,AL,1,35762,5,34.7834,-86.5685,Reichert University,1195,26318,0.53
 3 | 2,Montgomery,AL,1,36109-3378,5,32.3842,-86.2164,Montgomery School,770,6785,0.61
 4 | 3,Montevallo,AL,1,35115-6000,5,33.1063,-86.8651,Indigo Card Community College,526,16767,0.50
 5 | 4,Montgomery,AL,1,36104-0271,5,32.3643,-86.2957,Warm Meadow School of Fine Art,457,1836,0.09
 6 | 5,Alexander City,AL,1,35010,5,32.9244,-85.9465,Kovacek Institute of Technology,1511,19372,0.82
 7 | 6,Athens,AL,1,35611,5,34.8056,-86.9651,Athens Institute,1057,49203,0.06
 8 | 7,Auburn University,AL,1,36849,5,32.6002,-85.4924,Alabama Beauty College of Auburn University,486,44097,0.50
 9 | 8,Birmingham,AL,1,35254,5,33.5155,-86.8536,Condemned Balloon Institute,616,59759,0.59
10 | 9,Tanner,AL,1,35671,5,34.6543,-86.9491,Inquisitive Farm College,971,34183,0.19
11 | 10,Enterprise,AL,1,36330-1300,5,31.2975,-85.837,Enterprise University,920,42629,0.59
12 | 


--------------------------------------------------------------------------------
/spec/spec_helper.rb:
--------------------------------------------------------------------------------
 1 | ENV['DATA_PATH']  = nil
 2 | ENV['RACK_ENV'] ||= 'test'
 3 | RACK_ENV          = ENV['RACK_ENV'] unless defined?(RACK_ENV)
 4 | 
 5 | #require File.expand_path(File.dirname(__FILE__) + "/../config/boot")
 6 | require_relative '../config/env.rb'
 7 | Dir[File.expand_path(File.dirname(__FILE__) + "/../app/helpers/**/*.rb")].each(&method(:require))
 8 | 
 9 | RSpec.configure do |config|
10 |   config.include Rack::Test::Methods
11 | 
12 |   config.before(:type => :feature) do
13 |     # load the Padrino web app defined in app/app.rb
14 |     require_relative '../config/boot'
15 |   end
16 |   config.before do
17 |     ENV['DATA_PATH'] = nil
18 |   end
19 | end
20 | 
21 | # You can use this method to custom specify a Rack app
22 | # you want rack-test to invoke:
23 | #
24 | #   app OpenDataMaker::App
25 | #   app OpenDataMaker::App.tap { |a| }
26 | #   app(OpenDataMaker::App) do
27 | #     set :foo, :bar
28 | #   end
29 | #
30 | def app(app = nil, &blk)
31 |   @app ||= block_given? ? app.instance_eval(&blk) : app
32 |   @app ||= Padrino.application
33 | end
34 | 


--------------------------------------------------------------------------------
/spec/fixtures/nested_files/school2013.csv:
--------------------------------------------------------------------------------
 1 | UNITID,CITY_MAIN,STABBR_MAIN,ST_FIPS_MAIN,ZIP_MAIN,REGION_MAIN,LATITUDE_MAIN,LONGITUDE_MAIN,INSTNM,SAT_AVG,earn_2002_p10,gt_25k_2006_p6
 2 | 1,Normal,AL,1,35762,5,34.7834,-86.5685,Reichert University,1195,26318,0.53
 3 | 2,Montgomery,AL,1,36109-3378,5,32.3842,-86.2164,Montgomery School,770,6785,0.61
 4 | 3,Montevallo,AL,1,35115-6000,5,33.1063,-86.8651,Indigo Card Community College,526,16767,0.50
 5 | 4,Montgomery,AL,1,36104-0271,5,32.3643,-86.2957,Warm Meadow School of Fine Art,457,1836,0.09
 6 | 5,Alexander City,AL,1,35010,5,32.9244,-85.9465,Kovacek Institute of Technology,1511,19372,0.82
 7 | 6,Athens,AL,1,35611,5,34.8056,-86.9651,Athens Institute,1057,49203,0.06
 8 | 7,Auburn University,AL,1,36849,5,32.6002,-85.4924,Alabama Beauty College of Auburn University,486,44097,0.50
 9 | 8,Birmingham,AL,1,35254,5,33.5155,-86.8536,Condemned Balloon Institute,616,59759,0.59
10 | 9,Tanner,AL,1,35671,5,34.6543,-86.9491,Inquisitive Farm College,971,34183,0.19
11 | 10,Enterprise,AL,1,36330-1300,5,31.2975,-85.837,Enterprise University,920,42629,0.59
12 | 


--------------------------------------------------------------------------------
/lib/zipcode/zipcode.rb:
--------------------------------------------------------------------------------
 1 | # Zipcode latitude and longitude data in us_zipcodes.txt
 2 | # provided by [GeoNames](http://www.geonames.org/)
 3 | # under under a Creative Commons Attribution 3.0 License:
 4 | # http://creativecommons.org/licenses/by/3.0/
 5 | 
 6 | # this code is in public domain (CC0 1.0)
 7 | # https://github.com/18F/open-data-maker/blob/dev/LICENSE.md
 8 | 
 9 | require 'csv'
10 | 
11 | class Zipcode
12 |   @@zipcode_hash = nil
13 | 
14 |   def Zipcode.latlon(zipcode)
15 |     zipcode = zipcode.to_s
16 |     @@zipcode_hash ||= converted_zipcodes
17 |     @@zipcode_hash[zipcode]
18 |   end
19 | 
20 |   def Zipcode.valid?(zipcode)
21 |     !!self.latlon(zipcode)
22 |   end
23 | 
24 |   private
25 |     def self.converted_zipcodes
26 |       parsed_file = CSV.read(File.expand_path("../us_zipcodes.txt", __FILE__), { :col_sep => "\t" })
27 |       zipcode_hash = {}
28 |       parsed_file.each do |row|
29 |         zipcode = row[1]
30 |         lat = row[9].to_f
31 |         lon = row[10].to_f
32 |         zipcode_hash[zipcode] = {'lat': lat, 'lon': lon}
33 |       end
34 |       zipcode_hash
35 |     end
36 | 
37 | end
38 | 


--------------------------------------------------------------------------------
/script/s3config.rb:
--------------------------------------------------------------------------------
 1 | # configure S3 with local credentials based on environment
 2 | # usage (from ruby script or irb):
 3 | #    require 's3config.rb'
 4 | #    @s3 = ::Aws::S3::Client.new
 5 | 
 6 | require 'dotenv'
 7 | 
 8 | branch = `echo $(git symbolic-ref --short HEAD)`.chomp
 9 | 
10 | if ENV['APP_ENV']
11 |   APP_ENV = ENV['APP_ENV']
12 |   puts "using APP_ENV from environment #{APP_ENV}"
13 | else
14 |   case branch
15 |     when "master"
16 |       APP_ENV = "production"
17 |     when "staging"
18 |       APP_ENV = "staging"
19 |     else
20 |       puts "not on master or staging branch lets use dev"
21 |       APP_ENV = "dev"
22 |   end
23 | end
24 | 
25 | Dotenv.load(
26 |   File.expand_path("../../.#{APP_ENV}.env", __FILE__),
27 |   File.expand_path("../../.env",  __FILE__))
28 | 
29 | require 'aws-sdk'
30 | puts "app env: #{APP_ENV}"
31 | puts "bucket name: #{ENV['s3_bucket']}"
32 | 
33 | 
34 | s3cred = {'access_key'=>  ENV['s3_access_key'], 'secret_key' => ENV['s3_secret_key']}
35 | 
36 | ::Aws.config[:credentials] = ::Aws::Credentials.new(s3cred['access_key'], s3cred['secret_key'])
37 | ::Aws.config[:region] = 'us-east-1'
38 | 


--------------------------------------------------------------------------------
/spec/fixtures/data.rb:
--------------------------------------------------------------------------------
 1 | # Ages adjusted for Springfield residents to average to 42
 2 | # Heights randomly set to generate a max of 142
 3 | def address_data
 4 |   @address_data ||= StringIO.new <<-eos
 5 | name,address,city,age,height
 6 | Paul,15 Penny Lane,Liverpool,10,142
 7 | Michelle,600 Pennsylvania Avenue,Washington,12,1
 8 | Marilyn,1313 Mockingbird Lane,Springfield,14,2
 9 | Sherlock,221B Baker Street,London,16,123
10 | Clark,66 Lois Lane,Smallville,18,141
11 | Bart,742 Evergreen Terrace,Springfield,70,142
12 | Paul,19 N Square,Boston,70,55.2
13 | Peter,66 Parker Lane,New York,74,11.5123
14 | eos
15 |   @address_data.rewind
16 |   @address_data
17 | end
18 | 
19 | def geo_data
20 |   @geo_data ||= StringIO.new <<-eos
21 | state,city,lat,lon
22 | CA,San Francisco,37.727239,-123.032229
23 | NY,"New York",40.664274,-73.938500
24 | CA,"Los Angeles",34.019394,-118.410825
25 | IL,Chicago,41.837551,-87.681844
26 | TX,Houston,29.780472,-95.386342
27 | PA,Philadelphia,40.009376,-75.133346
28 | CA,"San Jose",37.296867,-121.819306
29 | MA,Boston,42.331960,-71.020173
30 | WA,Seattle,47.620499,-122.350876
31 | eos
32 |   @geo_data.rewind
33 |   @geo_data
34 | end
35 | 


--------------------------------------------------------------------------------
/spec/fixtures/nested_files/school-data.csv:
--------------------------------------------------------------------------------
 1 | UNITID,CITY_MAIN,STABBR_MAIN,ST_FIPS_MAIN,ZIP_MAIN,REGION_MAIN,LATITUDE_MAIN,LONGITUDE_MAIN,INSTNM,SAT_AVG,earn_2002_p10,gt_25k_2006_p6
 2 | 1,Normal,AL,1,35762,5,34.7834,-86.5685,Reichert University,1195,26318,0.53
 3 | 2,Montgomery,AL,1,36109-3378,5,32.3842,-86.2164,Montgomery School,770,6785,0.61
 4 | 3,Montevallo,AL,1,35115-6000,5,33.1063,-86.8651,Indigo Card Community College,526,16767,0.50
 5 | 4,Montgomery,AL,1,36104-0271,5,32.3643,-86.2957,Warm Meadow School of Fine Art,457,1836,0.09
 6 | 5,Alexander City,AL,1,35010,5,32.9244,-85.9465,Kovacek Institute of Technology,1511,19372,0.82
 7 | 6,Athens,AL,1,35611,5,34.8056,-86.9651,Athens Institute,1057,49203,0.06
 8 | 7,Auburn University,AL,1,36849,5,32.6002,-85.4924,Alabama Beauty College of Auburn University,486,44097,0.50
 9 | 8,Birmingham,AL,1,35254,5,33.5155,-86.8536,Condemned Balloon Institute,616,59759,0.59
10 | 9,Tanner,AL,1,35671,5,34.6543,-86.9491,Inquisitive Farm College,971,34183,0.19
11 | 10,Enterprise,AL,1,36330-1300,5,31.2975,-85.837,Enterprise University,920,42629,0.59
12 | 11,Montgomery,NULL,1,36117,5,32.3643,-86.2957,Auburn University at Montgomery,940,49879,0.64
13 | 


--------------------------------------------------------------------------------
/spec/lib/expression/eval_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'expression/parser'
 2 | require 'expression/eval'
 3 | 
 4 | describe Expression::Eval do
 5 | 
 6 |   let(:parser) { Expression::Parser.new }
 7 |   let(:eval) { Expression::Eval.new }
 8 |   let(:values) {{ 'f' => 0, 't' => 1 }}
 9 | 
10 |   it "simple 'or'" do
11 |     expect(
12 |       eval.apply(parser.parse('t or f'), variables: values)
13 |     ).to eq(1)
14 |   end
15 | 
16 |   describe "simple 'and'" do
17 |     it "true and false" do
18 |       expect(
19 |         eval.apply(parser.parse('t and f'), variables: values)
20 |       ).to eq(0)
21 |     end
22 | 
23 |     it "false and true" do
24 |       expect(
25 |         eval.apply(parser.parse('f and t'), variables: values)
26 |       ).to eq(0)
27 |     end
28 |   end
29 | 
30 |   it "multiple operands" do
31 |     expect(
32 |       eval.apply(parser.parse('f or f or t'), variables: values)
33 |     ).to eq(1)
34 |   end
35 | 
36 |   describe "parens" do
37 |     it "nested 'or'" do
38 |       expect(
39 |         eval.apply(parser.parse('(f or t) and t'), variables: values)
40 |       ).to eq(1)
41 |     end
42 | 
43 |     it "nested 'and'" do
44 |       expect(
45 |         eval.apply(parser.parse('(f and t) or f'), variables: values)
46 |       ).to eq(0)
47 |     end
48 |   end
49 | end
50 | 


--------------------------------------------------------------------------------
/spec/fixtures/schools/schools.csv:
--------------------------------------------------------------------------------
 1 | UNITID,CITY_MAIN,STABBR_MAIN,ST_FIPS_MAIN,ZIP_MAIN,REGION_MAIN,LATITUDE_MAIN,LONGITUDE_MAIN,INSTNM,SAT_AVG,PREDDEG,UGDS,MENONLY,WOMENONLY,C150_4_POOLED_SUPP,C150_L4_POOLED_SUPP,earn_2002_p10,gt_25k_2006_p6
 2 | 1,Normal,AL,1,35762,5,34.7834,-86.5685,Indigo Peak School,639,1,183504,0,0,NULL,0.16,3800,0.61
 3 | 2,Montgomery,AL,1,36109-3378,5,32.3842,-86.2164,Warm Thread Beauty College,1218,3,210739,0,0,0.62,NULL,13566,0.10
 4 | 3,Montevallo,AL,1,35115-6000,5,33.1063,-86.8651,Arrogant Abyss University,613,1,116967,0,0,NULL,0,1177,0.84
 5 | 4,Montgomery,AL,1,36104-0271,5,32.3643,-86.2957,Greasy Marsh Institute,590,1,81254,0,1,NULL,NULL,54146,0.49
 6 | 5,Alexander City,AL,1,35010,5,32.9244,-85.9465,Harber Institute of Technology,1355,1,256538,1,0,0,0.91,38553,0.32
 7 | 6,Athens,AL,1,35611,5,34.8056,-86.9651,Unsightly Mountain School of Fine Art,1201,1,139899,0,0,NULL,0.87,55899,0.95
 8 | 7,Auburn University,AL,1,36849,5,32.6002,-85.4924,Auburn University College,740,3,165974,0,0,0.21,NULL,51608,0.73
 9 | 8,Birmingham,AL,1,35254,5,33.5155,-86.8536,Birmingham School,1084,1,224554,0,0,NULL,0.70,29545,0.67
10 | 9,Tanner,AL,1,35671,5,34.6543,-86.9491,Conn Institute of Technology,1171,4,87710,0,0,NULL,0.56,58307,0.63
11 | 10,Enterprise,AL,1,36330-1300,5,31.2975,-85.837,Hollow Resonance Institute,1058,2,97265,0,0,NULL,0.59,17880,0.36
12 | 


--------------------------------------------------------------------------------
/spec/fixtures/schools/data.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | version: Aug6-2015-08-10-23:48-0600
 3 | api: fakeschool
 4 | index: fakeschool-data
 5 | unique:
 6 | - id
 7 | options:
 8 |   limit_files: 1
 9 |   limit_rows: 100
10 | 
11 | dictionary:
12 |   id:
13 |     source: UNITID
14 |     type: integer
15 |     description: Unit ID for institution
16 |   school.name:
17 |     source: INSTNM
18 |     description: Institution name
19 |   school.city:
20 |     source: CITY_MAIN
21 |     description: City
22 |   school.state:
23 |     source: STABBR_MAIN
24 |     description: State postcode
25 |   school.zip:
26 |     source: ZIP_MAIN
27 |     type: integer
28 |     description: ZIP code
29 |   completion.rate.lt_four_year:
30 |     source: C150_L4_POOLED_SUPP
31 |     type: float
32 |     description: 150% completion rate for less-than-four-year institutions, pooled in two-year rolling averages and suppressed for small n size
33 |   completion.rate.four_year:
34 |     source: C150_4_POOLED_SUPP
35 |     type: float
36 |     description: 150% completion rate for four-year institutions, pooled in two-year rolling averages and suppressed for small n size
37 |   completion.rate.overall:
38 |     calculate: C150_L4_POOLED_SUPP or C150_4_POOLED_SUPP
39 |     type: float
40 |     description: 150% completion rate for the institution, independent of degree
41 | 
42 | files:
43 |   - name: schools.csv
44 | 


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
 1 | source 'https://rubygems.org'
 2 | ruby '2.2.4'
 3 | 
 4 | # Distribute your app as a gem
 5 | # gemspec
 6 | 
 7 | # Server requirements
 8 | # gem 'thin' # or mongrel
 9 | # gem 'trinidad', :platform => 'jruby'
10 | 
11 | # Optional JSON codec (faster performance)
12 | # gem 'oj'
13 | 
14 | # open-data-maker requirements
15 | gem 'elasticsearch'
16 | gem 'stretchy'
17 | gem 'hashie'
18 | gem 'cf-app-utils'
19 | #gem 'unicorn'
20 | gem 'puma'
21 | gem 'safe_yaml'
22 | gem 'aws-sdk', '~> 2'
23 | gem 'actionview'
24 | gem 'dotenv'
25 | gem 'oj'
26 | gem 'parslet'
27 | gem 'parallel'
28 | 
29 | # Project requirements
30 | gem 'rake'
31 | 
32 | # Component requirements
33 | gem 'sass'
34 | gem 'liquify'
35 | gem 'liquid', '= 3.0.3'
36 | gem 'erubis'
37 | 
38 | # Test requirements
39 | group :test do
40 |   gem 'rspec'
41 |   gem 'rspec-mocks'
42 |   gem 'rack-test', :require => 'rack/test'
43 | end
44 | 
45 | group 'dev' do
46 |   gem 'google_drive'
47 |   gem 'ruby-prof'
48 | 
49 | end
50 | # Padrino Stable Gem
51 | gem 'padrino', '0.12.5'
52 | 
53 | gem 'pry', :group => ['development', 'test']
54 | gem 'pry-byebug', :group => ['development', 'test']
55 | gem 'newrelic_rpm'
56 | 
57 | # Or Padrino Edge
58 | # gem 'padrino', :github => 'padrino/padrino-framework'
59 | 
60 | # Or Individual Gems
61 | # %w(core support gen helpers cache mailer admin).each do |g|
62 | #   gem 'padrino-' + g, '0.12.5'
63 | # end
64 | 


--------------------------------------------------------------------------------
/lib/data_magic/index/output.rb:
--------------------------------------------------------------------------------
 1 | module DataMagic
 2 |   module Index
 3 |     class Output
 4 |       attr_reader :row_count, :headers, :skipped
 5 | 
 6 |       def initialize
 7 |         @row_count = 0
 8 |         @skipped = []
 9 |       end
10 | 
11 |       def set_headers(doc)
12 |         return if headers
13 |         @headers = doc.headers
14 |       end
15 | 
16 |       def skipping(id)
17 |         skipped << id
18 |       end
19 | 
20 |       def increment(count = 1)
21 |         @row_count += count
22 |       end
23 | 
24 |       def validate!
25 |         raise DataMagic::InvalidData, "zero rows" if empty?
26 |       end
27 | 
28 |       def empty?
29 |         row_count == 0
30 |       end
31 | 
32 |       def log(doc)
33 |         log_0(doc) if empty?
34 |         log_marker if row_count % 500 == 0
35 |       end
36 | 
37 |       def log_skips
38 |         return if skipped.empty?
39 |         logger.info "skipped (missing parent id): #{skipped.join(',')}"
40 |       end
41 | 
42 |       def log_limit
43 |         logger.info "done now, limiting rows to #{row_count}"
44 |       end
45 | 
46 |       private
47 | 
48 |       def log_0(document)
49 |         logger.debug "csv parsed"
50 |         logger.info "row#{row_count} -> #{document.preview}"
51 |       end
52 | 
53 |       def log_marker
54 |         logger.info "indexing rows: #{row_count}..."
55 |       end
56 |     end
57 |   end
58 | end
59 | 


--------------------------------------------------------------------------------
/spec/features/web_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | describe 'app', type: 'feature' do
 4 |   before do
 5 |     DataMagic.destroy
 6 |     ENV['DATA_PATH'] = './spec/fixtures/sample-data'
 7 |     DataMagic.init(load_now: true)
 8 |   end
 9 | 
10 |   after do
11 |     DataMagic.destroy
12 |   end
13 | 
14 |   it "should load the home page" do
15 |     get '/'
16 |     expect(last_response).to be_ok
17 |   end
18 | 
19 |   it "should display links to endpoints" do
20 |     get '/'
21 |     expect(last_response.body).to include '<a href="/v1/cities">cities</a>'
22 |   end
23 | 
24 |   it "should display a list of categories" do
25 |     get '/'
26 |     expect(last_response.body).to include('Browse Data Details by Category')
27 |     expect(last_response.body).to include('General') # category name
28 |     expect(last_response.body).to include('general information about the city, including standard identifiers')
29 |   end
30 | 
31 |   it "should load the correct category page" do
32 |     get '/category/general'
33 |     expect(last_response.body).to include('Data Details for the')
34 |     expect(last_response.body).to include('category_entry = {"title":"General"')
35 |     expect(last_response.body).to include('population') # a field name
36 |     expect(last_response.body).to include('The name of the city') # a field description
37 |     expect(last_response.body).to include('literal') # field type
38 |   end
39 | end
40 | 


--------------------------------------------------------------------------------
/script/bootstrap:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -e
 4 | 
 5 | fancy_echo() {
 6 |   local fmt="$1"; shift
 7 | 
 8 |   # shellcheck disable=SC2059
 9 |   printf "\n$fmt\n" "$@"
10 | }
11 | 
12 | brew_install_or_upgrade() {
13 |   if brew_is_installed "$1"; then
14 |     if brew_is_upgradable "$1"; then
15 |       fancy_echo "Upgrading %s ..." "$1"
16 |       brew upgrade "$@"
17 |     else
18 |       fancy_echo "Already using the latest version of %s. Skipping ..." "$1"
19 |     fi
20 |   else
21 |     fancy_echo "Installing %s ..." "$1"
22 |     brew install "$@"
23 |   fi
24 | }
25 | 
26 | brew_is_installed() {
27 |   brew list -1 | grep -Fqx "$1"
28 | }
29 | 
30 | brew_is_upgradable() {
31 |   ! brew outdated --quiet "$1" >/dev/null
32 | }
33 | 
34 | brew_tap_is_installed() {
35 |   brew tap | grep -Fqx "$1"
36 | }
37 | 
38 | brew_tap() {
39 |   if ! brew_tap_is_installed "$1"; then
40 |     fancy_echo "Tapping $1..."
41 |     brew tap "$1" 2> /dev/null
42 |   fi
43 | }
44 | 
45 | echo 'Installing dependencies...'
46 | 
47 | if command -v brew >/dev/null; then
48 |   brew update
49 | 
50 |   brew_tap 'homebrew/services'
51 |   brew_tap 'homebrew/versions'
52 |   brew_install_or_upgrade 'elasticsearch17'
53 | 
54 |   brew services restart elasticsearch17
55 | 
56 |   # elasticsearch takes several seconds to load
57 |   sleep 10
58 | fi
59 | 
60 | gem install bundler --conservative
61 | bundle check || bundle install
62 | 
63 | echo "All done!"
64 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | As a work of the United States Government, this project is in the
 2 | public domain within the United States.
 3 | 
 4 | Additionally, we waive copyright and related rights in the work
 5 | worldwide through the CC0 1.0 Universal public domain dedication.
 6 | 
 7 | ## CC0 1.0 Universal Summary
 8 | 
 9 | This is a human-readable summary of the
10 | [Legal Code (read the full text)](https://creativecommons.org/publicdomain/zero/1.0/legalcode).
11 | 
12 | ### No Copyright
13 | 
14 | The person who associated a work with this deed has dedicated the work to
15 | the public domain by waiving all of his or her rights to the work worldwide
16 | under copyright law, including all related and neighboring rights, to the
17 | extent allowed by law.
18 | 
19 | You can copy, modify, distribute and perform the work, even for commercial
20 | purposes, all without asking permission.
21 | 
22 | ### Other Information
23 | 
24 | In no way are the patent or trademark rights of any person affected by CC0,
25 | nor are the rights that other persons may have in the work or in how the
26 | work is used, such as publicity or privacy rights.
27 | 
28 | Unless expressly stated otherwise, the person who associated a work with
29 | this deed makes no warranties about the work, and disclaims liability for
30 | all uses of the work, to the fullest extent permitted by applicable law.
31 | When using or citing the work, you should not imply endorsement by the
32 | author or the affirmer.
33 | 
34 | 


--------------------------------------------------------------------------------
/DICTIONARY.md:
--------------------------------------------------------------------------------
 1 | # Dictionary Format
 2 | 
 3 | The data dictionary format may be (optionally) specified in the `data.yaml` file.  If unspecified, all columns are imported as strings.
 4 | 
 5 | ## Simple Data Types
 6 | 
 7 | ```
 8 | dictionary:
 9 |   name:
10 |     source: COLUMN_NAME
11 |     type: integer
12 |     description: explanation of where this data comes from and its meaning
13 | ```
14 | 
15 | In the above example:
16 | * `source:` is the name of the column in the csv. (This doesn't have to be all caps, we just find that to be common in government datasets.)
17 | * `type:` may be `integer`,  `float`, `string`
18 | * `description:` text description suitable for developer documentation or information provided to data analysts
19 | 
20 | ## Calculated columns
21 | 
22 | Optionally, you can add "columns" by calculating fields at import based on multiple csv columns.  
23 | 
24 | ```
25 | academics.program.degree.health:
26 |   calculate: CIP51ASSOC or CIP51BACHL
27 |   type: integer
28 |   description: Associate or Bachelor's degree in Health
29 | ```
30 | 
31 | Multiple operations are supported.  In the following example, if the columns `apples`, `oranges` and `plums` had a `0` value when there were none, and a `1` to represent if they were available, then these values could be combines with `or` to create a data field representing if any were true.
32 | 
33 | ```
34 | fruit:
35 |   calculate: apples or oranges or plums
36 |   type: integer
37 |   description: is there any fruit available?
38 | ```
39 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/name_type_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'data_magic'
 3 | 
 4 | describe "DataMagic name types" do
 5 | 
 6 |   before :example do
 7 |     DataMagic.destroy
 8 |     ENV['DATA_PATH'] = './spec/fixtures/types'
 9 |     DataMagic.init(load_now: true)
10 |   end
11 |   after :example do
12 |     DataMagic.destroy
13 |   end
14 | 
15 |   it "can search for one word" do
16 |     response = DataMagic.search({'city.name' => 'New'}, fields:['city.name'])
17 |     results = response['results'].sort {|a,b| a['city.name'] <=> b['city.name']}
18 |     expect(results).to eq(
19 |       [{"city.name"=>"New Orleans"}, {"city.name"=>"New York"}])
20 |   end
21 | 
22 |   it "can search for multiple words" do
23 |     response = DataMagic.search({'city.name' => 'New York'}, fields:['city.name'])
24 |     results = response['results']
25 |     expect(results).to eq(
26 |       [{"city.name"=>"New York"}])
27 |   end
28 | 
29 |   it "can search for partial words" do
30 |     response = DataMagic.search({'city.name' => 'S Fran'}, fields:['city.name'])
31 |     results = response['results']
32 |     expect(results).to eq(
33 |       [{"city.name"=>"San Francisco"}])
34 |   end
35 | 
36 |   it "is not case sensitive" do
37 |     response = DataMagic.search({'city.name' => 'nEW'}, fields:['city.name'])
38 |     results = response['results'].sort {|a,b| a['city.name'] <=> b['city.name']}
39 |     expect(results).to eq(
40 |       [{"city.name"=>"New Orleans"}, {"city.name"=>"New York"}])
41 |   end
42 | end
43 | 


--------------------------------------------------------------------------------
/spec/lib/expression/parser_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'expression/parser'
 2 | 
 3 | describe Expression::Parser do
 4 | 
 5 |   let(:parser) { Expression::Parser.new }
 6 |   describe 'vars' do
 7 |     it "parses one" do
 8 |       expect(parser.parse('one')).to eq(var: 'one')
 9 |     end
10 |     it "preserves case " do
11 |       expect(parser.parse('ONe')).to eq(var: 'ONe')
12 |     end
13 |     it "consumes trailing white space" do
14 |       expect(parser.parse('one    ')).to eq(var: 'one')
15 |     end
16 |   end
17 | 
18 |   it "parses or expression" do
19 |     expect(parser.parse('apples or oranges')).to eq(
20 |       {or: {left: {var: "apples"}, right: {var: "oranges"}}}
21 |     )
22 |   end
23 | 
24 |   it "parses and expression" do
25 |     expect(parser.parse('apples and oranges')).to eq(
26 |       {and: {left: {var: "apples"}, right: {var: "oranges"}}}
27 |     )
28 |   end
29 | 
30 |   describe "parens" do
31 |     it "nested 'or'" do
32 |       expect(parser.parse('(apples or cranberries) and nuts')).to eq(
33 |         {:and => {
34 |           :left=>{:or=>{:left=>{:var=>"apples"}, :right=>{:var=>"cranberries"}}},
35 |           :right=>{:var=>"nuts"}}}
36 |       )
37 |     end
38 |     it "nested 'and'" do
39 |       expect(parser.parse('(nuts and cranberries) or apples')).to eq(
40 |         { or: {
41 |           left: { and: { left: {var: "nuts"}, right: {var:"cranberries"}}},
42 |           right: { var: "apples" }
43 |           }
44 |         }
45 |       )
46 |     end
47 | 
48 |   end
49 | 
50 | end
51 | 


--------------------------------------------------------------------------------
/lib/expression/parser.rb:
--------------------------------------------------------------------------------
 1 | require 'parslet'
 2 | # based on https://github.com/kschiess/parslet/blob/master/example/boolean_algebra.rb
 3 | # usage:
 4 | # def parse(str)
 5 | #   ExpressionParser.new.parse(str)
 6 | #
 7 | # rescue Parslet::ParseFailed => failure
 8 | #   puts failure.cause.ascii_tree
 9 | # end
10 | #
11 | # tree = ExpressionParser.new.parse("one or two")
12 | #  => {:or=>{:left=>{:var=>"one"@0}, :right=>{:var=>"two"@7}}}
13 | # Eval.new.apply(tree, variables: {"one"=>1, "two"=>2})
14 | #
15 | # Variables.new.apply(tree)
16 | 
17 | class Expression
18 |   class Parser < Parslet::Parser
19 |   rule(:space)  { match[" "].repeat(1) }
20 |   rule(:space?) { space.maybe }
21 | 
22 |   rule(:lparen) { str("(") >> space? }
23 |   rule(:rparen) { str(")") >> space? }
24 | 
25 |   rule(:and_operator) { str("and") >> space? }
26 |   rule(:or_operator)  { str("or")  >> space? }
27 | 
28 |   rule(:var) { match["[^\s\(\)]"].repeat(1).as(:var) >> space? }
29 | 
30 |   # The primary rule deals with parentheses.
31 |   rule(:primary) { lparen >> or_operation >> rparen | var }
32 | 
33 |   # Note that following rules are both right-recursive.
34 |   rule(:and_operation) {
35 |     (primary.as(:left) >> and_operator >>
36 |       and_operation.as(:right)).as(:and) |
37 |     primary }
38 | 
39 |   rule(:or_operation)  {
40 |     (and_operation.as(:left) >> or_operator >>
41 |       or_operation.as(:right)).as(:or) |
42 |     and_operation }
43 | 
44 |   # We start at the lowest precedence rule.
45 |   root(:or_operation)
46 | end
47 | end
48 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/index/event_logger_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'data_magic'
 3 | 
 4 | describe DataMagic::Index::EventLogger do
 5 |   let(:event_logger) {
 6 |     l = DataMagic::Index::EventLogger.new
 7 |     allow(l).to receive(:logger).and_return(logger)
 8 |     l
 9 |   }
10 | 
11 |   let(:logger) { double('logger') }
12 | 
13 |   context 'when triggering an event with only a message argument' do
14 |     it 'logs the message with the right level' do
15 |       expect(logger).to receive(:info).with('hey!')
16 |       event_logger.trigger('info', 'hey!')
17 | 
18 |       expect(logger).to receive(:debug).with('what happened?')
19 |       event_logger.trigger('debug', 'what happened?')
20 | 
21 |       expect(logger).to receive(:warn).with('dude? everything ok?')
22 |       event_logger.trigger('warn', 'dude? everything ok?')
23 | 
24 |       expect(logger).to receive(:error).with('FIRE IN THE HOLE!')
25 |       event_logger.trigger('error', 'FIRE IN THE HOLE!')
26 |     end
27 |   end
28 | 
29 |   context 'when triggering an event with a message and an object' do
30 |     it 'logs as a key value pair with an inspection of the object' do
31 |       expect(logger).to receive(:info).with("foo: {:wild=>\"bar\"}")
32 |       event_logger.trigger('info', 'foo', {wild: 'bar'})
33 |     end
34 | 
35 |     it 'will shorten the object inspection when provided a limit' do
36 |       expect(logger).to receive(:warn).with("foo: {:wild")
37 |       event_logger.trigger('warn', 'foo', {wild: 'bar'}, 5)
38 |     end
39 |   end
40 | end
41 | 


--------------------------------------------------------------------------------
/lib/data_magic/index/repository.rb:
--------------------------------------------------------------------------------
 1 | module DataMagic
 2 |   module Index
 3 |     class Repository
 4 |       attr_reader :client, :document
 5 | 
 6 |       def initialize(client, document)
 7 |         @client = client
 8 |         @document = document
 9 |       end
10 | 
11 |       def save
12 |         @skipped = false
13 |         if client.creating?
14 |           create
15 |         else
16 |           update
17 |         end
18 |       end
19 | 
20 |       def skipped?
21 |         @skipped
22 |       end
23 | 
24 |       def save
25 |         if client.creating?
26 |           create
27 |         else
28 |           update
29 |         end
30 |       end
31 | 
32 |       private
33 | 
34 |       def update
35 |         if client.allow_skips?
36 |           update_with_rescue
37 |         else
38 |           update_without_rescue
39 |         end
40 |       end
41 | 
42 |       def create
43 |         client.index({
44 |           index: client.index_name,
45 |           id: document.id,
46 |           type: 'document',
47 |           body: document.data
48 |         })
49 |       end
50 | 
51 |       def update_without_rescue
52 |         client.update({
53 |           index: client.index_name,
54 |           id: document.id,
55 |           type: 'document',
56 |           body: {doc: document.data}
57 |         })
58 |       end
59 | 
60 |       def update_with_rescue
61 |         update_without_rescue
62 |       rescue Elasticsearch::Transport::Transport::Errors::NotFound
63 |         @skipped = true
64 |       end
65 |     end
66 |   end
67 | end
68 | 


--------------------------------------------------------------------------------
/lib/nested_hash.rb:
--------------------------------------------------------------------------------
 1 | class NestedHash < Hash
 2 | 
 3 |   def initialize(hash = {}, default = nil, &block)
 4 |     default ? super(default) : super(&block)
 5 |     self.add(hash)
 6 |   end
 7 | 
 8 |   def add(hash)
 9 |     hash.each do |full_name, value|
10 |       parts = full_name.to_s.split('.')
11 |       last = parts.length - 1
12 |       add_to = self
13 |       parts.each_with_index do |name, index|
14 |         if index == last
15 |           add_to[name] = value
16 |         else
17 |           add_to[name] ||= {}
18 |           add_to = add_to[name]
19 |         end
20 |       end
21 |     end
22 |     self
23 |   end
24 | 
25 |   # generate a flat, non-nested hash
26 |   # with keys that have dots representing the hierarchy
27 |   def withdotkeys(deep_hash = self, flat_hash = {}, root = '')
28 |     deep_hash.each do |key, value|
29 |       if deep_hash[key].is_a?(Hash)
30 |         flat_hash.merge! withdotkeys(value, flat_hash, key + '.')
31 |       else
32 |         key = "#{root}#{key}" if not root.empty?
33 |         flat_hash[key] = value
34 |       end
35 |     end
36 |     flat_hash
37 |   end
38 | 
39 |   # generate a list of the keys with dots representing the hierarchy
40 |   def dotkeys(row = self, prefix = '', path = [])
41 |     human_names = []
42 |     paths = []
43 |     row.keys.each do |key|
44 |       if row[key].is_a?(Hash)
45 |         new_human_names = dotkeys(row[key], key + '.')
46 |         human_names += new_human_names
47 |       else
48 |         human_names << prefix + key
49 |       end
50 |     end
51 |     human_names
52 |   end
53 | 
54 | end
55 | 


--------------------------------------------------------------------------------
/app/views/home.liquid:
--------------------------------------------------------------------------------
 1 | <section>
 2 |   <div class="header">
 3 |     <img class="logo" style="float: left" src="https://18f.gsa.gov/assets/images/logo-18f.png">
 4 |     <br/>
 5 |     <h1 class="title">{{ title }}</h1>
 6 |     <div style="clear: both"></div>
 7 |   </div>
 8 |   </br>
 9 | </section>
10 | 
11 | <h2>API endpoints</h2>
12 | 
13 | <ul>
14 | {% for name in endpoints %}
15 |   <li><a href="/v1/{{ name }}">{{ name }}</a></li>
16 | {% endfor %}
17 | </ul>
18 | 
19 | {% if examples.size > 0 %}
20 |   <h2>Examples</h2>
21 | 
22 |   <ul>
23 |   {% for ex in examples %}
24 |     <li><a href="{{ ex.link }}">{{ ex.name }}</a> {{ ex.description }}</li>
25 |   {% endfor %}
26 |   </ul>
27 | {% endif %}
28 | 
29 | <h2 class="bottom-margin">Browse Data Details by Category</h2>
30 | <div class="categories">
31 |   <div id="categories-list" class="categories__column"></div>
32 | </div>
33 | 
34 | <script>
35 |   var categories = {{ categories }} ;
36 |   if (categories === null)
37 |     var oneColumn = "Sorry, no categories have been added yet.";
38 |   else {
39 |     var oneColumn = "";
40 |     for (var i in categories) {
41 |       oneColumn = oneColumn +
42 |         "<div class=\"category\">" +    // put box around category block
43 |         "<a href=\"/category/" + i + "\">" +    // make whole div into a link
44 |           "<div class=\"category__name\">" + categories[i].title + "</div>" +
45 |           "<p>" + categories[i].description + "</p>" +
46 |         "</a>" +
47 |         "</div>";
48 |     }
49 |   }
50 | //  alert(oneColumn);
51 |   document.getElementById("categories-list").innerHTML=oneColumn;
52 | </script>
53 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/calculated_columns_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'data_magic'
 3 | 
 4 | describe "calculated columns" do
 5 | 
 6 |   before :example do
 7 |     DataMagic.destroy
 8 |     ENV['DATA_PATH'] = data_path
 9 |   end
10 |   after :example do
11 |     DataMagic.destroy
12 |   end
13 | 
14 |   describe "combine into float" do
15 |     let(:data_path) { "./spec/fixtures/schools" }
16 |     it "can combine two columns" do
17 |       DataMagic.config = DataMagic::Config.new
18 |       DataMagic.import_with_dictionary
19 |       result = DataMagic.search({}, fields: ['id', 'completion.rate.overall'])
20 |       results = result['results'].sort_by { |hash| hash['id'] }
21 |       expect(results[0]).to eq('id' => 1, 'completion.rate.overall' => 0.16)
22 |       expect(results[1]).to eq('id' => 2, 'completion.rate.overall' => 0.62)
23 |       expect(results[2]).to eq('id' => 3, 'completion.rate.overall' => nil)
24 |       expect(results[3]).to eq('id' => 4, 'completion.rate.overall' => nil)
25 |       expect(results[4]).to eq('id' => 5, 'completion.rate.overall' => 0.91)
26 |     end
27 |   end
28 | 
29 |   describe "combine into boolean" do
30 |     let(:data_path) { "./spec/fixtures/calculated_columns" }
31 |     it "can combine multiple columns" do
32 |       DataMagic.config = DataMagic::Config.new
33 |       DataMagic.import_with_dictionary
34 |       result = DataMagic.search({}, fields: %w(id summarybool))
35 |       results = result['results'].sort_by { |hash| hash['id'] }
36 |       expect(results[0]).to eq('id' => 1, 'summarybool' => true)
37 |       expect(results[1]).to eq('id' => 2, 'summarybool' => false)
38 |       expect(results[2]).to eq('id' => 3, 'summarybool' => true)
39 |     end
40 |   end
41 | end
42 | 


--------------------------------------------------------------------------------
/spec/lib/expression_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'expression/expression'
 2 | 
 3 | describe Expression do
 4 |   context "simple or expression" do
 5 |     it "can find variables" do
 6 |       expr = "ONE or TWO"
 7 |       expect(Expression.new(expr).variables).to eq(%w(ONE TWO))
 8 |     end
 9 | 
10 |     it "evaluates: 0 OR 1 to be 1" do
11 |       expr = "f or t"
12 |       values = {f:0, t:1}
13 |       expect(Expression.new(expr).evaluate(values)).to eq(1)
14 |     end
15 | 
16 |     it "evaluates: 1 OR 0 to be 1" do
17 |       expr = "t or f"
18 |       values = {f:0, t:1}
19 |       expect(Expression.new(expr).evaluate(values)).to eq(1)
20 |     end
21 | 
22 |     it "evaluates: 0 OR 0 to be 0" do
23 |       expr = "f1 or f2"
24 |       values = {f1:0, f2:0}
25 |       expect(Expression.new(expr).evaluate(values)).to eq(0)
26 |     end
27 | 
28 |     it "evaluates: 1 OR 1 to be 1" do
29 |       expr = "t1 or t2"
30 |       values = {t1:1, t2:1}
31 |       expect(Expression.new(expr).evaluate(values)).to eq(1)
32 |     end
33 | 
34 |     it "evaluates: 1 OR nil to be 1" do
35 |       expr = "t1 or t2"
36 |       values = {t1:1, t2:nil}
37 |       expect(Expression.new(expr).evaluate(values)).to eq(1)
38 |     end
39 | 
40 |     it "evaluates: 0 OR nil to be nil" do
41 |       expr = "t1 or t2"
42 |       values = {t1:0, t2:nil}
43 |       expect(Expression.new(expr).evaluate(values)).to eq(nil)
44 |     end
45 | 
46 |     it "evaluates: nil OR 0 to be 0" do
47 |       expr = "t1 or t2"
48 |       values = {t1:nil, t2:0}
49 |       expect(Expression.new(expr).evaluate(values)).to eq(0)
50 |     end
51 | 
52 |     it "evaluates: nil OR nil to be nil" do
53 |       expr = "t1 or t2"
54 |       values = {t1:nil, t2:nil}
55 |       expect(Expression.new(expr).evaluate(values)).to eq(nil)
56 |     end
57 |   end
58 | end
59 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/create_index_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'data_magic'
 3 | 
 4 | describe "DataMagic #init" do
 5 |   before (:all) do
 6 |     ENV['DATA_PATH'] = './spec/fixtures/import_with_dictionary'
 7 |   end
 8 | 
 9 |   after(:each) do
10 |     DataMagic.destroy
11 |   end
12 | 
13 |   context "with no options" do
14 |     it "creates index only once" do
15 |       expect(DataMagic).to receive(:create_index).once
16 |       DataMagic.init
17 |     end
18 | 
19 |     it "creates index" do
20 |       DataMagic.init
21 |       expect(DataMagic.config.index_exists?).to be true
22 |     end
23 | 
24 |     it "does not re-create index with subsequent call to #import_with_dictionary" do
25 |       expect(DataMagic).to receive(:create_index).once
26 |       DataMagic.init
27 |       DataMagic.import_with_dictionary
28 |     end
29 |   end
30 | 
31 | 
32 |   context "with load_now: false" do
33 |     it "does not call #create_index" do
34 |       expect(DataMagic).not_to receive(:create_index)
35 |       DataMagic.init(load_now: false)
36 |     end
37 | 
38 |     it "does not create index" do
39 |       DataMagic.init(load_now: false)
40 |       expect(DataMagic.config.index_exists?).to be false
41 |     end
42 | 
43 |     it "creates index with subsequent call to #import_with_dictionary" do
44 |       DataMagic.init(load_now: false)
45 |       DataMagic.import_with_dictionary
46 |       expect(DataMagic.config.index_exists?).to be true
47 |     end
48 | 
49 |     it "creates index with subsequent call to #import_csv" do
50 |       ENV['DATA_PATH'] = './spec/fixtures/minimal'
51 |       DataMagic.init(load_now: false)
52 |       data_str = <<-eos
53 |       a,b
54 |       1,2
55 |       3,4
56 |       eos
57 |       data = StringIO.new(data_str)
58 |       DataMagic.import_csv(data)
59 |       expect(DataMagic.config.index_exists?).to be true
60 |     end
61 |   end
62 | end


--------------------------------------------------------------------------------
/spec/lib/nested_hash_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'nested_hash'
 2 | 
 3 | describe NestedHash do
 4 |   let(:input) { {"loc.x" => 1, "loc.y" => 2, "foo.a" => 10, "foo.b" => 20, "loc.z" => 3}}
 5 |   let(:expected) {{"loc" => {"x" => 1, "y" => 2, "z" => 3}, "foo" => {"a" => 10, "b" => 20}}}
 6 | 
 7 |   let(:symbol_keys) { {x:1, y:2}}
 8 |   let(:symbol_keys_result) { {'x' => 1, 'y' => 2}}
 9 | 
10 | 
11 |   it ".add created nested hash elements for string keys with '.'" do
12 |     result = NestedHash.new.add(input)
13 |     expect(result).to eq(expected)
14 |   end
15 | 
16 |   it "does no harm when initialized with an already nested hash" do
17 |     expect(NestedHash.new(expected)).to eq(expected)
18 |   end
19 | 
20 |   context "methods" do
21 |     let (:result) { NestedHash.new(input) }
22 |     it "can initialize with another Hash" do
23 |       expect(result).to eq(expected)
24 |     end
25 | 
26 |     it "can generate dotkeys" do
27 |       expect(result.dotkeys.sort).to eq(input.keys.sort)
28 |     end
29 | 
30 |     it "withdotkeys generates keys with '.'" do
31 |       expect(result.withdotkeys).to eq(input)
32 |     end
33 | 
34 |     it "dotkeys and withdotkeys have same order" do
35 |       expect(result.withdotkeys.keys).to eq(result.dotkeys)
36 |     end
37 |   end
38 | 
39 | 
40 |   it "turns symbol keys into simple strings" do
41 |     result = NestedHash.new.add(symbol_keys)
42 |     expect(result).to eq(symbol_keys_result)
43 |   end
44 | 
45 |   context "deeply nested" do
46 |     let(:input) { {"info.loc.x" => 0.11, "info.loc.y" => 0.222, "foo.a" => 10, "foo.b" => 20}}
47 |     let(:expected) { {"info" => {"loc" => {"x" => 0.11, "y" => 0.222}}, "foo" => {"a" => 10, "b" => 20}}}
48 | 
49 |     it "creates nested hash elements for string keys with '.'" do
50 |       result = NestedHash.new.add(input)
51 |       expect(result).to eq(expected)
52 |     end
53 | 
54 |   end
55 | 
56 | end
57 | 


--------------------------------------------------------------------------------
/app/views/category.liquid:
--------------------------------------------------------------------------------
 1 | <section>
 2 |   <div class="header">
 3 |     <img class="logo" style="float: left" src="https://18f.gsa.gov/assets/images/logo-18f.png">
 4 |     <br/>
 5 |     <h1 class="title">{{ title }}</h1>
 6 |     <div style="clear: both"></div>
 7 |   </div>
 8 |   </br>
 9 | </section>
10 | 
11 | <h2 class="bottom-margin">Data Details for the <span id="category-name"></span> Category</h2>
12 | 
13 | <div class="category">
14 |   <div id="category-details" class="category"></div>
15 |   <div id="field-details" class="category"></div>
16 | </div>
17 | 
18 | </br>
19 | <a href="/">Back to the list of Categories</a>
20 | 
21 | <script>
22 |   var category_entry = {{ category_entry }} ;
23 |   var field_details = {{ field_details }} ;
24 | 
25 |   var category_details_div = "<p>" + category_entry.description + "</p>" +
26 |     "<ul class=\"category__fields\">" +
27 |     "<hr/>";
28 | 
29 |   var field_details_div = "" ;
30 |   category_entry.fields.forEach(function (field) {
31 |     var field_description = field_details[field].description ;
32 |     var field_type = field_details[field].type ;
33 |     if ( field_type ) {
34 |       field_type = "[" + field_type + "]" ;
35 |     }
36 |     else {
37 |       field_type = "" ;
38 |     }
39 | 
40 |     field_details_div = field_details_div +
41 |       "<li class=\"category__fields\">" +
42 |         "<div class=\"category__field-name\">" + field +
43 |           "<span class=\"category__field-type\">" + field_type + "</span>" +
44 |           "</div>" +
45 |         "<div>" + field_description + "</div>" +
46 |         "<hr/>" +
47 |       "</li>" +
48 |      "</ul>";
49 |   });
50 | 
51 |   document.getElementById("category-name").innerHTML = category_entry.title;
52 |   document.getElementById("category-details").innerHTML = category_details_div;
53 |   document.getElementById("field-details").innerHTML = field_details_div;
54 | </script>
55 | 


--------------------------------------------------------------------------------
/config/apps.rb:
--------------------------------------------------------------------------------
 1 | ##
 2 | # This file mounts each app in the Padrino project to a specified sub-uri.
 3 | # You can mount additional applications using any of these commands below:
 4 | #
 5 | #   Padrino.mount('blog').to('/blog')
 6 | #   Padrino.mount('blog', :app_class => 'BlogApp').to('/blog')
 7 | #   Padrino.mount('blog', :app_file =>  'path/to/blog/app.rb').to('/blog')
 8 | #
 9 | # You can also map apps to a specified host:
10 | #
11 | #   Padrino.mount('Admin').host('admin.example.org')
12 | #   Padrino.mount('WebSite').host(/.*\.?example.org/)
13 | #   Padrino.mount('Foo').to('/foo').host('bar.example.org')
14 | #
15 | # Note 1: Mounted apps (by default) should be placed into the project root at '/app_name'.
16 | # Note 2: If you use the host matching remember to respect the order of the rules.
17 | #
18 | # By default, this file mounts the primary app which was generated with this project.
19 | # However, the mounted app can be modified as needed:
20 | #
21 | #   Padrino.mount('AppName', :app_file => 'path/to/file', :app_class => 'BlogApp').to('/')
22 | #
23 | 
24 | ##
25 | # Setup global project settings for your apps. These settings are inherited by every subapp. You can
26 | # override these settings in the subapps as needed.
27 | #
28 | Padrino.configure_apps do
29 |   # enable :sessions
30 |   set :session_secret, 'ffb8bfc2d71e2ad938950169de2757ab7b73b1cd5fbf91b4b912ae493dc5b70f'
31 |   set :protection, :except => :path_traversal
32 |   set :protect_from_csrf, true
33 | 
34 |   set :allow_origin, :any
35 | 
36 | end
37 | 
38 | # If needed, mount the app that does indexing
39 | if ENV['INDEX_APP'] == "enable"
40 |   puts "mounting index app"
41 |   Padrino.mount('OpenDataMaker::IndexApp', :app_file => Padrino.root('app/index_app.rb')).to('/index')
42 | end
43 | 
44 | # Mounts the core application for this project
45 | Padrino.mount('OpenDataMaker::App', :app_file => Padrino.root('app/app.rb')).to('/')
46 | 


--------------------------------------------------------------------------------
/lib/data_magic/index/row_importer.rb:
--------------------------------------------------------------------------------
 1 | require 'forwardable'
 2 | 
 3 | module DataMagic
 4 |   module Index
 5 |     class RowImporter
 6 |       attr_reader :row, :importer
 7 | 
 8 |       def initialize(row, importer)
 9 |         @row = row
10 |         @importer = importer
11 |       end
12 | 
13 |       def process
14 |         log_row_start
15 |         before_save
16 |         save
17 |         after_save
18 |         log_row_end
19 |       end
20 | 
21 |       def document
22 |         @document ||= DocumentBuilder.create(row, importer.builder_data, config)
23 |       end
24 | 
25 |       def repository
26 |         @repository ||= Repository.new(importer.client, document)
27 |       end
28 | 
29 |       private
30 | 
31 |       def log_row_start
32 |         trigger("debug", "csv parsed") if importer.empty?
33 |         trigger("info", "row #{importer.row_count}", document, 500) if importer.row_count % 500 == 0
34 |         #trigger("info", "id", document.id)
35 |         if document.id_empty?
36 |           trigger("warn", "blank id")
37 |           trigger("warn", "unique", config.data["unique"])
38 |           trigger("warn", "in row", document, 255)
39 |         end
40 |       end
41 | 
42 |       def before_save
43 |         importer.set_headers(document)
44 |       end
45 | 
46 |       def save
47 |         repository.save
48 |       end
49 | 
50 |       def after_save
51 |         importer.skipping(document.id) if repository.skipped?
52 |         importer.increment
53 |       end
54 | 
55 |       def log_row_end
56 |         return if !importer.at_limit?
57 |         trigger("info", "done now, limiting rows to #{importer.row_count}")
58 |       end
59 | 
60 |       def config
61 |         DataMagic.config
62 |       end
63 | 
64 |       extend Forwardable
65 | 
66 |       def_delegators :importer, :trigger
67 | 
68 |       def self.process(*args)
69 |         new(*args).process
70 |       end
71 |     end
72 |   end
73 | end
74 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/index/document_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'data_magic'
 3 | 
 4 | describe DataMagic::Index::Document do
 5 |   before do
 6 |     allow(DataMagic).to receive(:config).and_return(config)
 7 |   end
 8 | 
 9 |   let(:document) { DataMagic::Index::Document.new(data) }
10 |   let(:config) { DataMagic::Config.new() }
11 |   let(:data) { {} }
12 | 
13 |   context 'when configured without any unique keys' do
14 |     before do
15 |       config.data['unique'] = []
16 |     end
17 | 
18 |     it 'id should be nil' do
19 |       expect(document.id).to be(nil)
20 |     end
21 | 
22 |     it 'id should not be empty though' do
23 |       expect(document.id_empty?).to be_falsey
24 |     end
25 |   end
26 | 
27 |   context 'when configured with the default keys' do
28 |     context 'and there is no data' do
29 |       it 'id should be an empty string' do
30 |         expect(document.id).to eq('')
31 |       end
32 | 
33 |       it 'id should be considered empty' do
34 |         expect(document.id_empty?).to be_truthy
35 |       end
36 |     end
37 | 
38 |     context 'when there is data' do
39 |       let(:data) {
40 |         {"name" => "foo", "state"=>"MA"}
41 |       }
42 | 
43 |       it 'id should be the value for the name key' do
44 |         expect(document.id).to eq('foo')
45 |       end
46 | 
47 |       it 'id should not be considered empty' do
48 |         expect(document.id_empty?).to be_falsey
49 |       end
50 |     end
51 |   end
52 | 
53 |   context 'with custom id configuration' do
54 |     let(:data) {
55 |       {"name" => "foo", "state"=>"MA"}
56 |     }
57 | 
58 |     before do
59 |       config.data['unique'] = ['name', 'state']
60 |     end
61 | 
62 |     it 'id should build the right id for the data' do
63 |       expect(document.id).to eq('foo:MA')
64 |     end
65 | 
66 |     it 'id should not be considered empty' do
67 |       expect(document.id_empty?).to be_falsey
68 |     end
69 |   end
70 | end
71 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/import_with_nested_files_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'data_magic'
 3 | 
 4 | describe "unique key(s)" do
 5 | 
 6 |   before :example do
 7 |     DataMagic.destroy
 8 |     ENV['DATA_PATH'] = './spec/fixtures/nested_files'
 9 |     DataMagic.config = DataMagic::Config.new
10 |     DataMagic.import_with_dictionary
11 |   end
12 |   after :example do
13 |     DataMagic.destroy
14 |   end
15 |   let(:query)   { {} }
16 |   let(:sort)    { nil }
17 |   let(:result)  { DataMagic.search(query, sort: sort) }
18 |   let(:first)   { result['results'].first }
19 |   let(:id_one)   { result['results'].find { |item| item['id'] == '1' } }
20 |   let(:total)   { result['metadata']['total'] }
21 | 
22 |   it "creates one document per unique id" do
23 |     expect(total).to eq(11)
24 |   end
25 | 
26 |   it "nests documents per unique id" do
27 |     expect(id_one['id']).to eq('1')
28 |     expect(id_one['2013']).to_not be_nil
29 |   end
30 | 
31 |   it "root document contains special 'only' fields" do
32 |     expect(id_one['id']).to eq('1')
33 |     expect(id_one['name']).to eq('Reichert University')
34 |     expect(id_one['city']).to eq('Normal')
35 |     expect(id_one['state']).to eq('AL')
36 |   end
37 | 
38 |   context "can import a subset of fields" do
39 |     context "and when searching for a field value" do
40 |       let(:query) { {zipcode: "35762"} }
41 |       it "and doesn't find column" do
42 |         expect(total).to eq(0)
43 |       end
44 |     end
45 |     it "and doesn't include extra field" do
46 |       expect(first['zipcode']).to be(nil)
47 |     end
48 |   end
49 | 
50 |   context "when searching on a nested field" do
51 |     let(:query) { { '2013.earnings.6_yrs_after_entry.median' => 26318 } }
52 |     it "can find the correct results" do
53 |       expect(total).to eq(1)
54 |       expect(first['2013']['earnings']['6_yrs_after_entry']).to eq({"percent_gt_25k"=>0.53, "median"=>26318})
55 |     end
56 |   end
57 | 
58 |   context "when sorting by a nested field" do
59 |     let(:sort) { '2013.earnings.6_yrs_after_entry.median' }
60 |     it "can find the right first result" do
61 |       expect(total).to eq(11)
62 |       expect(first['2013']['earnings']['6_yrs_after_entry']).to eq({"percent_gt_25k"=>0.09, "median"=>1836})
63 |     end
64 |   end
65 | end
66 | 


--------------------------------------------------------------------------------
/app/stylesheets/application.sass:
--------------------------------------------------------------------------------
  1 | body
  2 |   -webkit-font-smoothing: antialiased
  3 |   font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif
  4 |   font-size: 1em
  5 |   line-height: 1.5
  6 |   color: #333
  7 | 
  8 | h1, h2, h3, h4, h5, h6
  9 |   font-family: "Raleway", "Helvetica Neue", Helvetica, Arial, sans-serif
 10 |   line-height: 1.1em
 11 |   margin: 0
 12 |   text-rendering: optimizeLegibility
 13 | 
 14 | p
 15 |   margin: 0 0 0.75em
 16 | 
 17 | hr
 18 |   border-bottom: 1px solid silver
 19 |   border-left: none
 20 |   border-right: none
 21 |   border-top: none
 22 |   margin: 1em 0
 23 | 
 24 | img
 25 |   -webkit-user-select: none
 26 |   cursor: zoom-in
 27 |   margin: 0
 28 |   max-width: 50%
 29 | 
 30 | .logo
 31 |   height: 150px
 32 |   width: 150px
 33 |   top: 50px
 34 |   left: 50px
 35 |   z-index: 20
 36 | 
 37 | @media screen and (max-width: 995px)
 38 |   .logo
 39 |     height: 100px
 40 |     width: 100px
 41 |     top: 40px
 42 |     left: 20px
 43 | 
 44 | @media screen and (max-width: 785px)
 45 |   .logo
 46 |     height: 75px
 47 |     width: 75px
 48 | 
 49 | @media screen and (max-width: 590px)
 50 |   .logo
 51 |     top: 73px
 52 | 
 53 | @media screen and (max-width: 480px)
 54 |   .logo
 55 |     top: 16px
 56 |     left: 0px
 57 | 
 58 | .bottom-margin
 59 |   margin-bottom: 0.5em
 60 |   color: #c00
 61 | 
 62 | .title
 63 |   text-align: center
 64 |   font-family: "Raleway", "Helvetica Neue", Helvetica, Arial, sans-serif
 65 |   font-size: 2em
 66 |   line-height: 2em
 67 | 
 68 | .header
 69 |   background-color: #9cf
 70 | 
 71 | .categories .category
 72 |   margin: 5px
 73 |   padding: 15px
 74 |   border: solid 1px silver
 75 |   word-wrap: break-word
 76 |   display: inline-block
 77 |   width: 92%
 78 |   background-color: #ffc
 79 |   a
 80 |     color: black
 81 |     text-decoration: none
 82 |     &:visited
 83 |       color: black
 84 | 
 85 | .categories__column
 86 |   display: inline-block
 87 |   width: 100%
 88 |   vertical-align: top
 89 |   -webkit-column-count: 2
 90 |   -moz-column-count: 2
 91 |   column-count: 2
 92 |   column-gap: .2em
 93 |   -webkit-column-gap: .2em
 94 |   -moz-column-gap: .2em
 95 | 
 96 | .category__name
 97 |   font-size: 18px
 98 |   font-weight: bold
 99 |   margin-bottom: 5px
100 |   color: #c00
101 | 
102 | .category__fields
103 |   list-style: none
104 |   padding: 0
105 | 
106 | .category__field-name
107 |   font-size: 15px
108 |   font-weight: bold
109 |   margin-bottom: 2px
110 |   color: #c00
111 |   width: 80%
112 | 
113 | .category__field-type
114 |   font-size: 15px
115 |   font-weight: bold
116 |   color: #c00
117 |   width: 10%
118 |   float: right
119 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/index/repository_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'data_magic'
 3 | 
 4 | describe DataMagic::Index::Repository do
 5 |   let(:repository) { DataMagic::Index::Repository.new(super_client, document) }
 6 | 
 7 |   let(:super_client) { double('super client', index_name: 'index') }
 8 |   let(:document) { double('document', {id: 'id', data: 'data'}) }
 9 | 
10 |   context 'when super client is creating' do
11 |     before do
12 |       allow(super_client).to receive(:creating?).and_return(true)
13 |       allow(super_client).to receive(:index)
14 |     end
15 | 
16 |     it '#save creates an index' do
17 |       expect(super_client).to receive(:index).with({
18 |         index: 'index',
19 |         id: 'id',
20 |         type: 'document',
21 |         body: 'data'
22 |       })
23 |       repository.save
24 |     end
25 | 
26 |     it '#save will not be skipped when successful' do
27 |       repository.save
28 |       expect(repository.skipped?).to be_falsey
29 |     end
30 |   end
31 | 
32 |   context 'when super client is not creating' do
33 |     before do
34 |       allow(super_client).to receive(:creating?).and_return(false)
35 |       allow(super_client).to receive(:allow_skips?)
36 |       allow(super_client).to receive(:update)
37 |     end
38 | 
39 |     it '#save updates an index' do
40 |       expect(super_client).to receive(:update).with({
41 |         index: 'index',
42 |         id: 'id',
43 |         type: 'document',
44 |         body: {doc: 'data'}
45 |       })
46 |       repository.save
47 |     end
48 | 
49 |     it '#save will not be skipped when successful' do
50 |       repository.save
51 |       expect(repository.skipped?).to be_falsey
52 |     end
53 |   end
54 | 
55 |   context 'when super client is not creating, not skipping and an error is raised' do
56 |     before do
57 |       allow(super_client).to receive(:creating?).and_return(false)
58 |       allow(super_client).to receive(:allow_skips?).and_return(false)
59 |     end
60 | 
61 |     it '#save raises an error' do
62 |       allow(super_client).to receive(:update).and_raise(Elasticsearch::Transport::Transport::Errors::NotFound)
63 |       expect {
64 |         repository.save
65 |       }.to raise_error(Elasticsearch::Transport::Transport::Errors::NotFound)
66 |     end
67 |   end
68 | 
69 |   context 'when super client is not creating, skipping and an error is raised' do
70 |     before do
71 |       allow(super_client).to receive(:creating?).and_return(false)
72 |       allow(super_client).to receive(:allow_skips?).and_return(true)
73 |     end
74 | 
75 |     it '#save marks the repository as skipped' do
76 |       allow(super_client).to receive(:update).and_raise(Elasticsearch::Transport::Transport::Errors::NotFound)
77 |       expect {
78 |         repository.save
79 |       }.not_to raise_error
80 |       expect(repository.skipped?).to eq(true)
81 |     end
82 |   end
83 | end
84 | 


--------------------------------------------------------------------------------
/lib/data_magic/index.rb:
--------------------------------------------------------------------------------
 1 | require 'forwardable'
 2 | 
 3 | require_relative 'config'
 4 | require_relative 'index/builder_data'
 5 | require_relative 'index/event_logger'
 6 | require_relative 'index/document'
 7 | require_relative 'index/document_builder'
 8 | require_relative 'index/importer'
 9 | require_relative 'index/output'
10 | require_relative 'index/repository'
11 | require_relative 'index/row_importer'
12 | require_relative 'index/super_client'
13 | 
14 | require 'action_view'  # for distance_of_time_in_words (logging time)
15 | include ActionView::Helpers::DateHelper  # for distance_of_time_in_words (logging time)
16 | 
17 | module DataMagic
18 |   # data could be a String or an io stream
19 |   def self.import_csv(data, options={})
20 |     Index::Importer.process(data, options)
21 |   end
22 | 
23 |   # pre-condition: index is already created w/ config
24 |   def self.index_with_dictionary(options = {})
25 |     start_time = Time.now
26 |     Config.logger.debug "--- index_with_dictionary, starting at #{start_time}"
27 | 
28 |     logger.info "files: #{self.config.files}"
29 |     config.files.each_with_index do |filepath, index|
30 |       fname = filepath.split('/').last
31 |       logger.debug "indexing #{fname} #{index} file config:#{config.additional_data_for_file(index).inspect}"
32 |       options[:add_data] = config.additional_data_for_file(index)
33 |       options[:only] = config.info_for_file(index, :only)
34 |       options[:nest] = config.info_for_file(index, :nest)
35 |       begin
36 |         logger.debug "*"*40
37 |         logger.debug "*    #{filepath}"
38 |         logger.debug "*"*40
39 |         file_start = Time.now
40 |         data = config.read_path(filepath)
41 |         rows, _ = DataMagic.import_csv(data, options)
42 |         file_end = Time.now
43 |         logger.debug "imported #{rows} rows in #{distance_of_time_in_words(file_end, file_start)}, ms: #{file_end - file_start}"
44 |       rescue DataMagic::InvalidData => e
45 |        Config.logger.debug "Error: skipping #{filepath}, #{e.message}"
46 |       end
47 |     end
48 |     end_time = Time.now
49 |     logger.debug "indexing complete: #{distance_of_time_in_words(end_time, start_time)}"
50 |     logger.debug "duration: #{end_time - start_time}"
51 |   end
52 | 
53 |   def self.import_with_dictionary(options = {})
54 |     #logger.debug("field_mapping: #{field_mapping.inspect}")
55 |     options[:mapping] = config.field_mapping
56 |     options = options.merge(config.options)
57 | 
58 |     es_index_name = self.config.load_datayaml(options[:data_path])
59 |     unless config.index_exists?(es_index_name)
60 |       logger.info "creating #{es_index_name}"   # TO DO: fix #14
61 |       create_index es_index_name, config.field_types
62 |     end
63 | 
64 |     index_with_dictionary(options)
65 | 
66 |   end # import_with_dictionary
67 | 
68 | private
69 |   def self.valid_types
70 |     %w[integer float string literal name autocomplete boolean]
71 |   end
72 | 
73 | end # module DataMagic
74 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/import_without_data_yaml_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'data_magic'
 3 | 
 4 | describe "DataMagic #import_without_data_yaml" do
 5 |   describe "without ALLOW_MISSING_YML" do
 6 |     it "not found locally raises error" do
 7 |       ENV['DATA_PATH'] = './spec/fixtures/cities_without_yml'
 8 |       expect {
 9 |         DataMagic.init(load_now: true)
10 |       }.to raise_error(IOError, "No data.y?ml found at ./spec/fixtures/cities_without_yml. Did you mean to define ALLOW_MISSING_YML environment variable?")
11 |     end
12 |     it "not found on s3 raises error" do
13 |       ENV['DATA_PATH'] = 's3://mybucket'
14 |       fake_s3 = Aws::S3::Client.new(stub_responses: true)
15 |       fake_s3.stub_responses(:get_object, Aws::S3::Errors::NoSuchKey.new(Seahorse::Client::RequestContext, 'Fake Error'))
16 |       expect {
17 |         config = DataMagic::Config.new(s3: fake_s3)
18 |       }.to raise_error(IOError, "No data.y?ml found at s3://mybucket. Did you mean to define ALLOW_MISSING_YML environment variable?")
19 |     end
20 | 
21 |   end
22 |   describe "with ALLOW_MISSING_YML" do
23 |     let (:expected) do
24 |       {
25 |         "metadata" => {
26 |           "total" => 1,
27 |           "page" => 0,
28 |           "per_page" => DataMagic::DEFAULT_PAGE_SIZE
29 |         },
30 |         "results" => 	[]
31 |       }
32 |     end
33 | 
34 |     before(:all) do
35 |       DataMagic.destroy
36 |       ENV['ALLOW_MISSING_YML'] = 'allow'
37 |       ENV['DATA_PATH'] = './spec/fixtures/cities_without_yml'
38 |       DataMagic.init(load_now: true)
39 |     end
40 |     after(:all) do
41 |       DataMagic.destroy
42 |       ENV['ALLOW_MISSING_YML'] = ''
43 |     end
44 | 
45 |     it "can get list of imported csv files" do
46 |       file_list = [
47 |         "./spec/fixtures/cities_without_yml/cities50.csv",
48 |         "./spec/fixtures/cities_without_yml/cities51-100.csv",
49 |         "./spec/fixtures/cities_without_yml/more.csv",
50 |       ]
51 |       expect(DataMagic.config.files.sort).to eq(file_list)
52 |     end
53 | 
54 |     it "can get index name from api endpoint" do
55 |       expect(DataMagic.config.find_index_for('cities-without-yml')).to eq('cities-without-yml')
56 |     end
57 | 
58 |     it "indexes files with yaml mapping" do
59 |       result = DataMagic.search({NAME: "Chicago"}, api: 'cities-without-yml')
60 |       expected["results"] = [
61 |         {
62 |           "USPS"=>"IL",
63 |           "GEOID"=>"1714000",
64 |           "ANSICODE"=>"00428803",
65 |           "NAME"=>"Chicago",
66 |           "LSAD"=>"25",
67 |           "FUNCSTAT"=>"A",
68 |           "POP10"=>"2695598",
69 |           "HU10"=>"1194337",
70 |           "ALAND"=>"589571105",
71 |           "AWATER"=>"16781658",
72 |           "ALAND_SQMI"=>"227.635",
73 |           "AWATER_SQMI"=>"6.479",
74 |           "INTPTLAT"=>"41.837551",
75 |           "INTPTLONG"=>"-87.681844",
76 |         }
77 |       ]
78 |       expect(result).to eq(expected)
79 |     end
80 |   end
81 | end
82 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/config_field_types_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | describe 'DataMagic::Config #field_types' do
 4 |   let(:config) { DataMagic::Config.new(load_datayaml: false) }
 5 | 
 6 |   it "returns empty if dictionary is empty" do
 7 |     allow(config).to receive(:file_config).and_return([{'name' => 'one.csv'}])
 8 |     allow(config).to receive(:dictionary).and_return({})
 9 |     expect(config.field_types).to eq({})
10 |   end
11 | 
12 |   context "when no type is given" do
13 |     before do
14 |       allow(config).to receive(:file_config).and_return([{'name' => 'one.csv'}])
15 |       allow(config).to receive(:dictionary).and_return({
16 |           'name' => {source:'NAME_COLUMN'}
17 |       })
18 |     end
19 | 
20 |     it "defaults to string" do
21 |       expect(config.field_types).to eq({
22 |           'name' => 'string'
23 |       })
24 |     end
25 |   end
26 | 
27 |   it "supports integers" do
28 |     allow(config).to receive(:file_config).and_return([{'name' => 'one.csv'}])
29 |     allow(config).to receive(:dictionary).and_return(
30 |       IndifferentHash.new count:
31 |         {source:'COUNT_COLUMN', type: 'integer'}
32 |     )
33 |     expect(config.field_types).to eq({'count' => 'integer'})
34 |   end
35 | 
36 |   context "with float type" do
37 |     it "sets float mapping" do
38 |       allow(config).to receive(:file_config).and_return([{'name' => 'one.csv'}])
39 |       allow(config).to receive(:dictionary).and_return(
40 |         IndifferentHash.new percent:
41 |            {source:'PERCENT_COLUMN', type: 'float'}
42 |       )
43 |       expect(config.field_types).to eq({'percent' => 'float'})
44 |     end
45 | 
46 |     it "can be excluded" do
47 |       allow(config).to receive(:dictionary).and_return(
48 |         IndifferentHash.new id: {source:'ID', type: 'integer'},
49 |           percent: {source:'PERCENT', type: 'float'}
50 |       )
51 |       allow(config).to receive(:file_config).and_return([
52 |         IndifferentHash.new({ name:'one.csv', only: ['id'] })
53 |       ])
54 |       expect(config.field_types).to eq({'id' => 'integer'})
55 |     end
56 | 
57 |     it "can be nested" do
58 |       allow(config).to receive(:dictionary).and_return(
59 |         IndifferentHash.new id: {source:'ID', type: 'integer'},
60 |           percent: {source:'PERCENT', type: 'float'}
61 |       )
62 |       allow(config).to receive(:file_config).and_return([
63 |         IndifferentHash.new({name:'one.csv',
64 |             only: ['id']}),
65 |         IndifferentHash.new({name:'two.csv',
66 |             nest: {key: '2012', contents: ['percent']}})
67 |       ])
68 |       expect(config.field_types).to eq({
69 |           'id' => 'integer',
70 |           '2012.percent' => 'float'
71 |       })
72 |     end
73 |   end
74 | 
75 |   it "supports special case for location fields as nil" do
76 |     # special case for location in create_index
77 |     allow(config).to receive(:dictionary).and_return(
78 |       IndifferentHash.new 'location.lat': {source:'LAT_COLUMN'},
79 |                           'location.lon': {source:'LON_COLUMN'}
80 | 
81 |     )
82 |     expect(config.field_types).to eq({})
83 |   end
84 | end
85 | 


--------------------------------------------------------------------------------
/spec/fixtures/sample-data/data.yaml:
--------------------------------------------------------------------------------
 1 | version: cities100-2010
 2 | # cities100.txt
 3 | # National Places Gazetteer Files, from US Census 2010
 4 | # https://www.census.gov/geo/maps-data/data/gazetteer2010.html
 5 | # (head -n 1 source.txt && tail -n +2 source.txt | LC_ALL=C sort -k7rn,7 -t$'\t' source.txt) > result.txt
 6 | # head -n 101 results.txt > cities100.txt
 7 | # then convertes to csv and removed " city" from after each city name
 8 | index: city-data
 9 | api: cities
10 | unique: ['name']
11 | options:
12 |   search: dictionary_only
13 | 
14 | dictionary:
15 |   id:
16 |     source: GEOID
17 |     description: >
18 |       Geographic Identifier - fully concatenated geographic code (State FIPS and
19 |       County FIPS). The Census Bureau and other state and federal agencies are
20 |       responsible for assigning geographic identifiers, or GEOIDs, to geographic
21 |       entities to facilitate the organization, presentation, and
22 |       exchange of geographic and statistical data. GEOIDs are numeric codes that
23 |       uniquely identify all administrative/legal and statistical geographic areas for
24 |       which the Census Bureau tabulates data. From Alaska, the largest state,
25 |       to the smallest census block in New York City, every geographic area
26 |       has a unique GEOID. Data users rely on GEOIDs to join the appropriate
27 |       demographic data from censuses and surveys, such as the
28 |       American Community Survey (ACS), to various levels of geography for data
29 |       analysis, interpretation and mapping.
30 |   code:
31 |     source: ANSICODE
32 |     description: >
33 |       American National Standards Institute codes (ANSI codes)
34 |       are standardized numeric or alphabetic codes issued by the American
35 |       National Standards Institute (ANSI) to ensure uniform identification of
36 |       geographic entities through all federal government agencies.
37 |   name:
38 |     source: NAME
39 |     description: The name of the city
40 |     type: literal
41 |   state:
42 |     source: USPS
43 |     description: Two letter state abbreviation
44 |   population:
45 |     source: POP10
46 |     description: City population from 2010 Census data
47 |     type: integer
48 |   location.lat: INTPTLAT
49 |   location.lon: INTPTLONG
50 |   area.land:
51 |     description: Land Area (square miles)
52 |     source: ALAND_SQMI
53 |     type: float
54 |   area.water:
55 |     description: Water Area (square miles)
56 |     source: AWATER_SQMI
57 |     type: float
58 | 
59 | categories:
60 |   general:
61 |     title: General
62 |     description: >
63 |       general information about the city, including standard
64 |       identifiers and actual census summary data about the population of the city.
65 |     fields: [id, code, name, state, population]
66 |   geographic:
67 |     title: Geographic
68 |     description: >
69 |       Geographic characteristics of the area. These are created for
70 |       statistical purposes only.  Depiction and designation for statistical
71 |       purposes does not constitute a determination of jurisdictional authority
72 |       or rights of ownership or entitlement.
73 |     fields: [location, area.land, area.water]
74 | 
75 | files:
76 |   - name: cities100.csv
77 | 


--------------------------------------------------------------------------------
/notes.txt:
--------------------------------------------------------------------------------
  1 | commit eabfb903751cc5b7bc9ae0affeb15ad020e1d783
  2 | Merge: 0b017e5 a5c5a18
  3 | Author: Yoz Grahame <yoz@yoz.com>
  4 | Date:   Tue Sep 8 17:53:20 2015 -0700
  5 | 
  6 |     Merge pull request #198 from 18F/source-false
  7 |     
  8 |     Use `_source: false` to limit JSON coming back from ES
  9 | 
 10 | commit a5c5a18381214d3a51ab54152b463727bb3f79bc
 11 | Author: Sarah Allen <sarah.allen@gsa.gov>
 12 | Date:   Tue Sep 8 17:41:21 2015 -0700
 13 | 
 14 |     exclude fields starting with _
 15 |     
 16 |     when the whole source is returned
 17 |     when we’re not specifying fields
 18 |     we need to explicitly exclude _names
 19 | 
 20 | commit 94bef492dca538f3e27f20824712f47793f8ab8c
 21 | Author: Yoz (Jeremy) Grahame <jeremy.grahame@gsa.gov>
 22 | Date:   Tue Sep 8 17:05:28 2015 -0700
 23 | 
 24 |     Also record "took" MS value from ES result
 25 | 
 26 | commit 2205dce984c52a50382030d37edeefbcd4316873
 27 | Merge: df7f98b 0b017e5
 28 | Author: Yoz (Jeremy) Grahame <jeremy.grahame@gsa.gov>
 29 | Date:   Tue Sep 8 17:02:09 2015 -0700
 30 | 
 31 |     Merge branch 'dev' into source-false
 32 | 
 33 | commit df7f98b216163eb1f27a038a535817ac95aea742
 34 | Author: Yoz (Jeremy) Grahame <jeremy.grahame@gsa.gov>
 35 | Date:   Tue Sep 8 17:01:20 2015 -0700
 36 | 
 37 |     Use `_source: false` for proper field exclusion
 38 |     
 39 |     also the "oj" gem for faster JSON
 40 | 
 41 | commit 0b017e56e3a6ed7c9549214a52ed6c2b568c4746
 42 | Merge: 9761906 e4f6dfd
 43 | Author: Sarah Allen <sarah@ultrasaurus.com>
 44 | Date:   Tue Sep 8 17:00:48 2015 -0700
 45 | 
 46 |     Merge pull request #197 from 18F/log-query-time
 47 |     
 48 |     Log ES query time, and show with "debug" option
 49 | 
 50 | commit e4f6dfd219b514a3d4523d836ad083005848186c
 51 | Author: Yoz (Jeremy) Grahame <jeremy.grahame@gsa.gov>
 52 | Date:   Tue Sep 8 16:58:55 2015 -0700
 53 | 
 54 |     No, THAT's how you test a hash value
 55 | 
 56 | commit 97619061da0badde70890ed1f26912c0355c5245
 57 | Merge: 9c1e04e 63ead03
 58 | Author: Sarah Allen <sarah@ultrasaurus.com>
 59 | Date:   Tue Sep 8 16:25:19 2015 -0700
 60 | 
 61 |     Merge pull request #196 from 18F/max-page-size
 62 |     
 63 |     Only allow up to MAX_PAGE_SIZE per page
 64 | 
 65 | commit 63ead033b40c619ac143065d36be3a4427458d68
 66 | Author: Yoz (Jeremy) Grahame <jeremy.grahame@gsa.gov>
 67 | Date:   Tue Sep 8 16:01:54 2015 -0700
 68 | 
 69 |     String-to-int bugfix
 70 | 
 71 | commit f85c981da6622d8d8925037325822ada4c4bfce1
 72 | Author: Yoz (Jeremy) Grahame <jeremy.grahame@gsa.gov>
 73 | Date:   Tue Sep 8 15:32:57 2015 -0700
 74 | 
 75 |     Added MAX_PAGE_SIZE test
 76 | 
 77 | commit 4ba1cedcf73a8ff7e6415ab54dcbdf80ff6d211c
 78 | Author: Yoz (Jeremy) Grahame <jeremy.grahame@gsa.gov>
 79 | Date:   Tue Sep 8 15:25:21 2015 -0700
 80 | 
 81 |     Only allow up to MAX_PAGE_SIZE per page
 82 | 
 83 | commit 226fb43f73268c91e4044118f84ceca707cceadb
 84 | Author: Yoz (Jeremy) Grahame <jeremy.grahame@gsa.gov>
 85 | Date:   Tue Sep 8 15:04:03 2015 -0700
 86 | 
 87 |     Log ES query time, and show with "debug" option
 88 | 
 89 | commit 9c1e04ecf2d25c505941c55d71868c2894102983
 90 | Merge: 6461581 99a490b
 91 | Author: Yoz Grahame <yoz@yoz.com>
 92 | Date:   Tue Sep 8 09:13:40 2015 -0700
 93 | 
 94 |     Merge pull request #194 from 18F/dev-sort
 95 |     
 96 |     autocomplete type alpha sort
 97 | 
 98 | commit 99a490b1b12046895b49f9b3001c05f30b2dfa82
 99 | Author: Sarah Allen <sarah.allen@gsa.gov>
100 | Date:   Tue Sep 8 01:51:27 2015 -0700
101 | 
102 |     autocomplete type alpha sort
103 | 


--------------------------------------------------------------------------------
/lib/data_magic/index/importer.rb:
--------------------------------------------------------------------------------
  1 | require 'forwardable'
  2 | 
  3 | module DataMagic
  4 |   module Index
  5 |     class Importer
  6 |       attr_reader :raw_data, :options
  7 | 
  8 |       def initialize(raw_data, options)
  9 |         @raw_data = raw_data
 10 |         @options = options
 11 |       end
 12 | 
 13 |       def process
 14 |         setup
 15 |         parse_and_log
 16 |         finish!
 17 | 
 18 |         [row_count, headers]
 19 |       end
 20 | 
 21 |       def client
 22 |         @client ||= SuperClient.new(es_client, options)
 23 |       end
 24 | 
 25 |       def builder_data
 26 |         @builder_data ||= BuilderData.new(raw_data, options)
 27 |       end
 28 | 
 29 |       def output
 30 |         @output ||= Output.new
 31 |       end
 32 | 
 33 |       def parse_and_log
 34 |         parse_csv
 35 |       rescue InvalidData => e
 36 |         trigger("error", e.message)
 37 |         raise InvalidData, "invalid file format" if empty?
 38 |       end
 39 | 
 40 |       def chunk_size
 41 |         (ENV['CHUNK_SIZE'] || 100).to_i
 42 |       end
 43 | 
 44 |       def nprocs
 45 |         (ENV['NPROCS'] || 1).to_i
 46 |       end
 47 | 
 48 |       def parse_csv
 49 |         if nprocs == 1
 50 |           parse_csv_whole
 51 |         else
 52 |           parse_csv_chunked
 53 |         end
 54 |         data.close
 55 |       end
 56 | 
 57 |       def parse_csv_whole
 58 |         CSV.new(
 59 |           data,
 60 |           headers: true,
 61 |           header_converters: lambda { |str| str.strip.to_sym }
 62 |         ).each do |row|
 63 |           RowImporter.process(row, self)
 64 |           break if at_limit?
 65 |         end
 66 |       end
 67 | 
 68 |       def parse_csv_chunked
 69 |         CSV.new(
 70 |           data,
 71 |           headers: true,
 72 |           header_converters: lambda { |str| str.strip.to_sym }
 73 |         ).each.each_slice(chunk_size) do |chunk|
 74 |           break if at_limit?
 75 |           chunks_per_proc = (chunk.size / nprocs.to_f).ceil
 76 |           Parallel.each(chunk.each_slice(chunks_per_proc)) do |rows|
 77 |             rows.each_with_index do |row, idx|
 78 |               RowImporter.process(row, self)
 79 |             end
 80 |           end
 81 |           if !headers
 82 |             single_document = DocumentBuilder.create(chunk.first, builder_data, DataMagic.config)
 83 |             set_headers(single_document)
 84 |           end
 85 |           increment(chunk.size)
 86 |         end
 87 |       end
 88 | 
 89 |       def setup
 90 |         client.create_index
 91 |         log_setup
 92 |       end
 93 | 
 94 |       def finish!
 95 |         validate!
 96 |         refresh_index
 97 |         log_finish
 98 |       end
 99 | 
100 |       def log_setup
101 |         opts = options.reject { |k,v| k == :mapping }
102 |         trigger("info", "options", opts)
103 |         trigger("info", "new_field_names", new_field_names)
104 |         trigger("info", "additional_data", additional_data)
105 |       end
106 | 
107 |       def log_finish
108 |         trigger("info", "skipped (missing parent id)", output.skipped) if !output.skipped.empty?
109 |         trigger('info', "done #{row_count} rows")
110 |       end
111 | 
112 |       def event_logger
113 |         @event_logger ||= EventLogger.new
114 |       end
115 | 
116 |       def at_limit?
117 |         options[:limit_rows] && row_count == options[:limit_rows]
118 |       end
119 | 
120 |       extend Forwardable
121 | 
122 |       def_delegators :output, :set_headers, :skipping, :skipped, :increment, :row_count, :log_limit,
123 |         :empty?, :validate!, :headers
124 |       def_delegators :builder_data, :data, :new_field_names, :additional_data
125 |       def_delegators :client, :refresh_index
126 |       def_delegators :event_logger, :trigger
127 | 
128 |       def self.process(*args)
129 |         new(*args).process
130 |       end
131 | 
132 |       private
133 | 
134 |       def es_client
135 |         DataMagic.client
136 |       end
137 |     end
138 |   end
139 | end
140 | 


--------------------------------------------------------------------------------
/sample-data/data.yaml:
--------------------------------------------------------------------------------
  1 | version: cities100-2010
  2 | # cities100.txt
  3 | # National Places Gazetteer Files, from US Census 2010
  4 | # https://www.census.gov/geo/maps-data/data/gazetteer2010.html
  5 | # (head -n 1 source.txt && tail -n +2 source.txt | LC_ALL=C sort -k7rn,7 -t$'\t' source.txt) > result.txt
  6 | # head -n 101 results.txt > cities100.txt
  7 | # then convertes to csv and removed " city" from after each city name
  8 | index: city-data
  9 | api: cities
 10 | unique: ['name']
 11 | 
 12 | options:
 13 |   search: dictionary_only   # API provides error when requesting fields not in dictionary
 14 | 
 15 | dictionary:
 16 |   id:
 17 |     source: GEOID
 18 |     description: >
 19 |       Geographic Identifier - fully concatenated geographic code (State FIPS and
 20 |       County FIPS). The Census Bureau and other state and federal agencies are
 21 |       responsible for assigning geographic identifiers, or GEOIDs, to geographic
 22 |       entities to facilitate the organization, presentation, and
 23 |       exchange of geographic and statistical data. GEOIDs are numeric codes that
 24 |       uniquely identify all administrative/legal and statistical geographic areas for
 25 |       which the Census Bureau tabulates data. From Alaska, the largest state,
 26 |       to the smallest census block in New York City, every geographic area
 27 |       has a unique GEOID. Data users rely on GEOIDs to join the appropriate
 28 |       demographic data from censuses and surveys, such as the
 29 |       American Community Survey (ACS), to various levels of geography for data
 30 |       analysis, interpretation and mapping.
 31 |   code:
 32 |     source: ANSICODE
 33 |     description: >
 34 |       American National Standards Institute codes (ANSI codes)
 35 |       are standardized numeric or alphabetic codes issued by the American
 36 |       National Standards Institute (ANSI) to ensure uniform identification of
 37 |       geographic entities through all federal government agencies.
 38 |   name:
 39 |     source: NAME
 40 |     description: The name of the city
 41 |     type: literal
 42 |   state:
 43 |     source: USPS
 44 |     description: Two letter state abbreviation
 45 |   population:
 46 |     source: POP10
 47 |     description: City population from 2010 Census data
 48 |     type: integer
 49 |   location.lat: INTPTLAT
 50 |   location.lon: INTPTLONG
 51 |   land_area:
 52 |     source: ALAND_SQMI
 53 |     description: Land Area (square miles)
 54 |     source: ALAND_SQMI
 55 |     type: float
 56 |   area.water:
 57 |     description: Water Area (square miles)
 58 |     source: AWATER_SQMI
 59 |     type: float
 60 | 
 61 | categories:
 62 |   general:
 63 |     title: General
 64 |     description: >
 65 |       general information about the city, including standard
 66 |       identifiers and actual census summary data about the population of the city.
 67 |     fields: [id, code, name, state, population]
 68 |   geographic:
 69 |     title: Geographic
 70 |     description: >
 71 |       Geographic characteristics of the area. These are created for
 72 |       statistical purposes only.  Depiction and designation for statistical
 73 |       purposes does not constitute a determination of jurisdictional authority
 74 |       or rights of ownership or entitlement.
 75 |     fields: [location, area.land, area.water]
 76 |   general2:
 77 |     title: General2
 78 |     description: >
 79 |       general information about the city, including standard
 80 |       identifiers and actual census summary data about the population of the city.
 81 |     fields: [id, code, name, state, population]
 82 |   general3:
 83 |     title: General3
 84 |     description: >
 85 |       short
 86 |     fields: [id, code, name, state, population]
 87 |   general4:
 88 |     title: General4
 89 |     description: >
 90 |       short
 91 |     fields: [id, code, name, state, population]
 92 |   general5:
 93 |     title: General5
 94 |     description: >
 95 |       general information about the city, including standard
 96 |       identifiers and actual census summary data about the population of the city.
 97 |     fields: [id, code, name, state, population]
 98 | 
 99 | files:
100 |   - name: cities100.csv
101 | 


--------------------------------------------------------------------------------
/public/javascripts/jquery-ujs.js:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Padrino Javascript Jquery Adapter
  3 |  * Created for use with Padrino Ruby Web Framework (http://www.padrinorb.com)
  4 | **/
  5 | 
  6 | /* Remote Form Support
  7 |  * form_for @user, '/user', :remote => true
  8 | **/
  9 | 
 10 | $(function(){
 11 |     $('form').on('submit', function(e) {
 12 |       var element = $(this), message = element.data('confirm');
 13 |       if (message && !confirm(message)) { return false; }
 14 |       if (element.data('remote') == true) {
 15 |         e.preventDefault(); e.stopped = true;
 16 |         JSAdapter.sendRequest(element, {
 17 |           verb: element.data('method') || element.attr('method') || 'post',
 18 |           url: element.attr('action'),
 19 |           dataType: element.data('type') || ($.ajaxSettings && $.ajaxSettings.dataType) || 'script',
 20 |           params: element.serializeArray()
 21 |         });
 22 |       }
 23 |     });
 24 | 
 25 |     /* Confirmation Support
 26 |      * link_to 'sign out', '/logout', :confirm => 'Log out?'
 27 |     **/
 28 | 
 29 |     $(document).on('click', 'a[data-confirm]', function(e) {
 30 |       var message = $(this).data('confirm');
 31 |       if (!confirm(message)) { e.preventDefault(); e.stopped = true; }
 32 |     });
 33 | 
 34 |     /*
 35 |      * Link Remote Support
 36 |      * link_to 'add item', '/create', :remote => true
 37 |     **/
 38 | 
 39 |     $(document).on('click', 'a[data-remote=true]', function(e) {
 40 |       var element = $(this);
 41 |       if (e.stopped) return;
 42 |       e.preventDefault(); e.stopped = true;
 43 |       JSAdapter.sendRequest(element, {
 44 |         verb: element.data('method') || 'get',
 45 |         url: element.attr('href')
 46 |       });
 47 |     });
 48 | 
 49 |     /*
 50 |      * Link Method Support
 51 |      * link_to 'delete item', '/destroy', :method => :delete
 52 |     **/
 53 | 
 54 |     $(document).on('click', 'a[data-method]:not([data-remote])', function(e) {
 55 |       if (e.stopped) return;
 56 |       JSAdapter.sendMethod($(this));
 57 |       e.preventDefault(); e.stopped = true;
 58 |     });
 59 | 
 60 |     /* JSAdapter */
 61 |     var JSAdapter = {
 62 |       // Sends an xhr request to the specified url with given verb and params
 63 |       // JSAdapter.sendRequest(element, { verb: 'put', url : '...', params: {} });
 64 |       sendRequest: function(element, options) {
 65 |         var verb = options.verb, url = options.url, params = options.params, dataType = options.dataType;
 66 |         var event = element.trigger('ajax:before');
 67 |         if (event.stopped) return false;
 68 |         $.ajax({
 69 |           url: url,
 70 |           type: verb.toUpperCase() || 'POST',
 71 |           data: params || [],
 72 |           dataType: dataType,
 73 | 
 74 |           beforeSend: function(request) { element.trigger('ajax:loading',  [ request ]); },
 75 |           complete:   function(request) { element.trigger('ajax:complete', [ request ]); },
 76 |           success:    function(request) { element.trigger('ajax:success',  [ request ]); },
 77 |           error:      function(request) { element.trigger('ajax:failure',  [ request ]); }
 78 |         });
 79 |         element.trigger('ajax:after');
 80 |       },
 81 |       // Triggers a particular method verb to be triggered in a form posting to the url
 82 |       // JSAdapter.sendMethod(element);
 83 |       sendMethod: function(element) {
 84 |         var verb = element.data('method');
 85 |         var url = element.attr('href');
 86 |         var form = $('<form method="post" action="'+url+'"></form>');
 87 |         var csrf_token = $('meta[name=csrf-token]').attr('content');
 88 |         var csrf_param = $('meta[name=csrf-param]').attr('content');
 89 |         form.hide().appendTo('body');
 90 |         if (verb !== 'post') {
 91 |           var field = '<input type="hidden" name="_method" value="' + verb + '" />';
 92 |           form.append(field);
 93 |         }
 94 |         if (csrf_param !== undefined && csrf_token !== undefined) {
 95 |           var field = '<input type="hidden" name="' + csrf_param + '" value="' + csrf_token + '" />';
 96 |           form.append(field);
 97 |         }
 98 |         form.submit();
 99 |       }
100 |     };
101 | 
102 |     // Every xhr request is sent along with the CSRF token.
103 |     $.ajaxPrefilter(function(options, originalOptions, xhr) {
104 |       if (options.verb !== 'GET') {
105 |         var token = $('meta[name="csrf-token"]').attr('content');
106 |         if (token) xhr.setRequestHeader('X-CSRF-Token', token);
107 |       }
108 |     });
109 | });
110 | 


--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
  1 | # Running Open Data Maker on your computer
  2 | 
  3 | If you just want to install and run, then you can just download a
  4 | [zip file](https://github.com/18F/open-data-maker/archive/master.zip).
  5 | 
  6 | You will still need the the dependencies below, but you don't need to
  7 | clone the git repo for the source code.
  8 | 
  9 | ## Install Prerequisites
 10 | 
 11 | You can run our bootstrap script to make sure you have all the dependencies.
 12 | It will also install and start up Elasticsearch:
 13 | 
 14 | ```
 15 | script/bootstrap
 16 | ```
 17 | 
 18 | To run Open Data Maker, you will need to have the following software installed on your computer:
 19 | * [Elasticsearch] 1.7.3
 20 | * [Ruby] 2.2.2
 21 | 
 22 | **NOTE: Open Data Maker does not currently work with Elasticsearch versions 2.x and above.**
 23 | You can follow or assist our progress towards 2.x compatibility [at this GitHub issue](https://github.com/18F/open-data-maker/issues/248).
 24 | 
 25 | ### Mac OS X
 26 | 
 27 | On a Mac, we recommend installing Ruby 2.2.2 via [RVM], and Elasticsearch 1.7.3 via
 28 | [Homebrew].  If you don't want to use the bootstrap script above, you can install
 29 | elasticsearch 1.7 with brew using the following command:
 30 | 
 31 | ```
 32 | brew install elasticsearch17
 33 | ```
 34 | 
 35 | If you are contributing to development, you will also need [Git].
 36 | If you don't already have these tools, the 18F [laptop] script will install
 37 | them for you.
 38 | 
 39 | ## Get the Source Code
 40 | 
 41 | For development, [fork](http://help.github.com/fork-a-repo/) the repo
 42 | first, then clone your fork.
 43 | 
 44 | ```
 45 | git clone https://github.com/<your GitHub username>/open-data-maker.git
 46 | cd open-data-maker
 47 | ```
 48 | 
 49 | ## Run the App
 50 | 
 51 | ### Make sure Elasticsearch is up and running
 52 | If you just ran `script/bootstrap`, then Elasticsearch should already be
 53 | running. But if you stopped it or restarted your computer, you'll need to
 54 | start it back up. Assuming you installed Elasticsearch via our `bootstrap`
 55 | script, you can restart it with this command:
 56 | 
 57 | ```brew services restart elasticsearch```
 58 | 
 59 | 
 60 | ### Import the data
 61 | 
 62 | To get started, you can import sample data with:
 63 | 
 64 | `rake import`
 65 | 
 66 | ### Start the app
 67 | 
 68 | ```
 69 | padrino start
 70 | ```
 71 | Go to: http://127.0.0.1:3000/
 72 | 
 73 | and you should see the text `Welcome to Open Data Maker` with a link to
 74 | the API created by the [sample data](sample-data).  
 75 | 
 76 | You can verify that the import was successful by visiting
 77 | http://127.0.0.1:3000/v1/cities?name=Cleveland. You should see something like:
 78 | 
 79 | ```json
 80 | {
 81 |   "state": "OH",
 82 |   "name": "Cleveland",
 83 |   "population": 396815,
 84 |   "land_area": 77.697,
 85 |   "location": {
 86 |     "lat": 41.478138,
 87 |     "lon": -81.679486
 88 |   }
 89 | ```
 90 | 
 91 | ### Custom Datasets
 92 | 
 93 | While the app is running (or anytime) you can run `rake import`. For instance, if you had a `presidents/data.yaml` file, you would import
 94 | it with:
 95 | 
 96 | ```sh
 97 | export DATA_PATH=presidents
 98 | rake import
 99 | # or, more succintly:
100 | DATA_PATH=presidents rake import
101 | ```
102 | 
103 | to clear the data, assuming the data set  had an index named "president-data"
104 | 
105 | ```
106 | rake es:delete[president-data]
107 | ```
108 | 
109 | you may alternately delete all the indices (which could affect other apps if
110 | they are using your local Elasticsearch)
111 | 
112 | ```
113 | rake es:delete[_all]
114 | ```
115 | 
116 | The data directory can optionally include a file called `data.yaml` (see [the sample one](sample-data/data.yaml) for its schema) that references one or more `.csv` files and specifies data types,
117 | field name mapping, and other support data.
118 | 
119 | ## Experimental web UI for indexing
120 | 
121 | Optionally, you can enable indexing from webapp, but this option is still experimental:
122 | * `export INDEX_APP=enable`
123 | * in your browser, go to /index/reindex
124 | 
125 | the old index (if present) will be deleted and re-created from source files at DATA_PATH.
126 | 
127 | ## Want to help?
128 | 
129 | See [Contribution Guide](CONTRIBUTING.md)
130 | 
131 | Read additional [implementation notes](NOTES.md)
132 | 
133 | [Elasticsearch]: https://www.elastic.co/products/elasticsearch
134 | [Homebrew]: http://brew.sh/
135 | [RVM]: https://github.com/wayneeseguin/rvm
136 | [rbenv]: https://github.com/sstephenson/rbenv
137 | [Ruby]: https://www.ruby-lang.org/en/
138 | [Git]: https://git-scm.com/
139 | [laptop]: https://github.com/18F/laptop
140 | 


--------------------------------------------------------------------------------
/app/controllers.rb:
--------------------------------------------------------------------------------
  1 | # Main front page
  2 | OpenDataMaker::App.controllers do
  3 |   get :index do
  4 |     render :home, layout: true, locals: {
  5 |       'title' => 'Open Data Maker',
  6 |       'endpoints' => DataMagic.config.api_endpoint_names,
  7 |       'examples' => DataMagic.config.examples,
  8 |       'categories' => DataMagic.config.categories.to_json
  9 |     }
 10 |   end
 11 | 
 12 |   get :category, :with => :id do
 13 |     category_entry = DataMagic.config.category_by_id(params[:id])
 14 |     render :category, layout: true, locals: {
 15 |       'title' => 'Open Data Maker',
 16 |       'category_entry' => category_entry.to_json,
 17 |       'field_details' => category_entry['field_details'].to_json
 18 |     }
 19 |   end
 20 | end
 21 | 
 22 | CACHE_TTL = 300
 23 | 
 24 | # All API requests are prefixed by the API version
 25 | # in this case, "v1" - e.g. "/vi/endpoints" etc.
 26 | OpenDataMaker::App.controllers :v1 do
 27 |   before do
 28 |     content_type :json
 29 |     headers 'Access-Control-Allow-Origin' => '*',
 30 |             'Access-Control-Allow-Methods' => ['GET'],
 31 |             'Surrogate-Control' => "max-age=#{CACHE_TTL}"
 32 |     cache_control :public, max_age: CACHE_TTL
 33 |   end
 34 | 
 35 |   get :endpoints do
 36 |     endpoints = DataMagic.config.api_endpoints.keys.map do |key|
 37 |       {
 38 |         name: key,
 39 |         url: url_for(:v1, :index, endpoint: key)
 40 |       }
 41 |     end
 42 |     return { endpoints: endpoints }.to_json
 43 |   end
 44 | 
 45 |   get '/data.json' do
 46 |     data = DataMagic.config.data
 47 |     data.to_json
 48 |   end
 49 | 
 50 |   get :index, with: ':endpoint/:command', provides: [:json] do
 51 |     process_params
 52 |   end
 53 | 
 54 |   get :index, with: ':endpoint', provides: [:json, :csv] do
 55 |     process_params
 56 |   end
 57 | end
 58 | 
 59 | def process_params
 60 |   options = get_search_args_from_params(params)
 61 |   DataMagic.logger.debug "-----> APP GET #{params.inspect} with options #{options.inspect}"
 62 | 
 63 |   check_endpoint!(options)
 64 |   set_content_type(options)
 65 |   search_and_respond(options)
 66 | end
 67 | 
 68 | def search_and_respond(options)
 69 |   data = DataMagic.search(params, options)
 70 |   halt 400, data.to_json if data.key?(:errors)
 71 | 
 72 |   if content_type == :csv
 73 |     output_data_as_csv(data['results'])
 74 |   else
 75 |     data.to_json
 76 |   end
 77 | end
 78 | 
 79 | def check_endpoint!(options)
 80 |   unless DataMagic.config.api_endpoints.keys.include? options[:endpoint]
 81 |     halt 404, {
 82 |            error: 404,
 83 |            message: "#{options[:endpoint]} not found. Available endpoints: #{DataMagic.config.api_endpoints.keys.join(',')}"
 84 |          }.to_json
 85 |   end
 86 | end
 87 | 
 88 | def set_content_type(options)
 89 |   if options[:command] == 'stats'
 90 |     content_type :json
 91 |   else
 92 |     content_type(options[:format].nil? ? :json : options[:format].to_sym)
 93 |   end
 94 | end
 95 | 
 96 | # TODO: Use of non-underscore-prefixed option parameters is still
 97 | # supported but deprecated, and should be removed at some point soon -
 98 | # see comment in method body
 99 | def get_search_args_from_params(params)
100 |   options = {}
101 |   %w(metrics sort fields zip distance page per_page debug).each do |opt|
102 |     options[opt.to_sym] = params.delete("_#{opt}")
103 |     # TODO: remove next line to end support for un-prefixed option parameters
104 |     options[opt.to_sym] ||= params.delete(opt)
105 |   end
106 |   options[:endpoint] = params.delete("endpoint") # these two params are
107 |   options[:format]   = params.delete("format")   # supplied by Padrino
108 |   options[:fields]   = (options[:fields]   || "").split(',')
109 |   options[:command]  = params.delete("command")
110 | 
111 |   options[:metrics] = options[:metrics].split(/\s*,\s*/) if options[:metrics]
112 |   options
113 | end
114 | 
115 | def output_data_as_csv(results)
116 |   # We assume all rows have the same keys
117 |   if results.empty?
118 |     ''
119 |   else
120 |     CSV.generate(force_quotes: true, headers: true) do |csv|
121 |       results.each_with_index do |row, row_num|
122 |         row = NestedHash.new(row).withdotkeys
123 |         # make the order match data.yaml order
124 |         output = DataMagic.config.field_types.each_with_object({}) do |(name, type), output|
125 |           output[name] = row[name] unless row[name].nil?
126 |           if name == "location"
127 |             output["location.lat"] = row["location.lat"] unless row["location.lat"].nil?
128 |             output["location.lon"] = row["location.lon"] unless row["location.lon"].nil?
129 |           end
130 |         end
131 |         csv << output.keys if row_num == 0
132 |         csv << output
133 |       end
134 |     end
135 |   end
136 | end
137 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/search_name_spec.rb:
--------------------------------------------------------------------------------
  1 | require 'spec_helper'
  2 | require 'data_magic'
  3 | require 'csv'
  4 | 
  5 | describe "DataMagic intuitive search" do
  6 | 
  7 |   before :example do
  8 |     DataMagic.destroy
  9 |     ENV['DATA_PATH'] = './spec/fixtures/school_names'
 10 |     DataMagic.init(load_now: true)
 11 |   end
 12 |   after :example do
 13 |     DataMagic.destroy
 14 |   end
 15 | 
 16 |   RSpec.configure do |c|
 17 |     c.alias_it_should_behave_like_to :it_correctly, 'correctly:'
 18 |   end
 19 | 
 20 |   let(:expected_meta) {{"metadata"=>{"total"=>1, "page"=>0, "per_page"=>20}}}
 21 |   let(:expected_match) { "" }
 22 |   let(:response) {  DataMagic.search(
 23 |                     {'school.name' => subject}, fields:['school.name']) }
 24 | 
 25 |   context "full request" do
 26 |     let(:response) {  DataMagic.search({id: 1}) }
 27 |     let(:expected_match) { [{"id"=>"1", "school"=>{"state"=>"AL", "name"=>"Stillman College"}}]}
 28 |     it "provides expected document" do
 29 |       expect(response['results']).to eql expected_match
 30 |     end
 31 |   end
 32 | 
 33 |   context "sort" do
 34 |      shared_examples "returns" do
 35 |        it "sorted results " do
 36 |          expect(response['results'].map { |i| i['school.name'] })
 37 |                  .to eql expected_match
 38 |        end
 39 |      end
 40 | 
 41 |      context "with list of names" do
 42 |        let(:response) {  DataMagic.search({}, fields:['school.name'],
 43 |                           sort: 'school.name') }
 44 |                           # fields:['name'],
 45 |        let(:expected_match) {
 46 |          csv_path = File.expand_path("../../fixtures/school_names/school_names.csv", __dir__)
 47 |          data = CSV.read(csv_path).slice(1..-1)
 48 |          data.map { |row| row[2] }
 49 |                    .sort.slice(0,20)
 50 |        }
 51 |        it_correctly "returns"
 52 |      end
 53 | 
 54 |   end
 55 | 
 56 |    context "basic search" do
 57 |     shared_examples "finds" do
 58 |       it "correct results " do
 59 |         expect(response['results']
 60 |                 .map { |i| i['school.name'] }
 61 |                 .sort )
 62 |                 .to eql expected_match
 63 |       end
 64 |       it "correct metadata" do
 65 |         expect(response.reject { |k, _| k == 'results' }).to eql expected_meta
 66 |       end
 67 |     end
 68 | 
 69 |     context "for exact match" do
 70 |       subject { 'New York University' }
 71 |       let(:expected_match) { ['New York University'] }
 72 |       it_correctly "finds"
 73 |     end
 74 |     context "for exact match (case insensitive)" do
 75 |       subject { 'new YORK UniverSity' }
 76 |       let(:expected_match) { ['New York University'] }
 77 |       it_correctly "finds"
 78 |     end
 79 | 
 80 |     context "for exact match (case insensitive)" do
 81 |       subject { 'new YORK UniverSity' }
 82 |       let(:expected_match) { ['New York University'] }
 83 |       it_correctly "finds"
 84 |     end
 85 | 
 86 |     context "by prefix" do
 87 |       subject { 'Still' }
 88 |       let(:expected_match) { ['Stillman College'] }
 89 |       it_correctly "finds"
 90 |     end
 91 | 
 92 |     context "by prefix (case insensitive)" do
 93 |       subject { 'still' }
 94 |       let(:expected_match) { ['Stillman College'] }
 95 |       it_correctly "finds"
 96 |     end
 97 | 
 98 |     context "by prefix in the middle of the name" do
 99 |       subject { 'Phoenix' }
100 |       let(:expected_meta) {{"metadata"=>{"total"=>3, "page"=>0, "per_page"=>20}}}
101 |       let(:expected_match) { ['Phoenix College',
102 |                               'University of Phoenix-Online Campus',
103 |                               "University of Phoenix-Phoenix Campus"] }
104 |       it_correctly "finds"
105 |     end
106 | 
107 |     context "with words in the wrong order" do
108 |       subject { 'University New York' }
109 |       let(:expected_match) { ['New York University'] }
110 |       it_correctly "finds"
111 |     end
112 | 
113 |     context "partial word after dash" do
114 |       subject { 'berk' }
115 |       let(:expected_meta) {{"metadata"=>{"total"=>3, "page"=>0, "per_page"=>20}}}
116 |       let(:expected_match) { ['Berk Trade and Business School',
117 |                               'Berklee College of Music',
118 |                               'University of California-Berkeley'] }
119 |       it_correctly "finds"
120 |     end
121 | 
122 |     context "words separated by dash" do
123 |       subject { 'phoenix online' }
124 |       let(:expected_match) { ['University of Phoenix-Online Campus'] }
125 |       it_correctly "finds"
126 |     end
127 |   end
128 |   # TO DO
129 |   # "pheonix" (mis-spelling) should probably work
130 |   # "phoenix college" should also probably return "university of phoenix" --- since college is a synonym for unversity
131 | 
132 | end
133 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Open Data Maker
  2 | [![Build Status](https://circleci.com/gh/18F/open-data-maker/tree/dev.svg?style=svg)](https://circleci.com/gh/18F/open-data-maker/tree/dev)
  3 | 
  4 | The goal of this project is to make it easy to turn a lot of potentially large
  5 | csv files into open data via an API and the ability for people to download
  6 | smaller csv files with a subset of the data.
  7 | 
  8 | Preliminary research suggests that open data users (journalists and others)
  9 | actually know how to work with spreadsheets really well, but a lot of the
 10 | data sets that we have in government are huge.
 11 | 
 12 | The first version of this project will allow us to host a website for an
 13 | agency with a specific set of csv files, which are deployed with the app.
 14 | This will allows us to deploy more quickly since there will be a lower risk
 15 | security profile than if an agency could upload the CSV files (which might
 16 | be a nice longer term feature).
 17 | 
 18 | 
 19 | ## Install and Run the App (as a developer)
 20 | 
 21 | See our [Installation Guide](INSTALL.md)
 22 | 
 23 | ## How this works
 24 | 
 25 | By default, data will be loaded from /sample-data when you run `rake import`
 26 | 
 27 | * [cities100.csv](sample-data/cities100.csv) - dataset of 100 most populous cities in the US
 28 | * [data.yaml](sample-data/data.yaml) - configuration for
 29 |   * index name *city-data*
 30 |   * api endpoint name *cities*
 31 |   * how columns are mapped to fields in json output
 32 |   * data types
 33 |   * unique columns *name*  
 34 | 
 35 | When you run the app, you can query the dataset via json API, like: /cities?name=Chicago
 36 | 
 37 | * http://localhost:3000/cities?name=Chicago
 38 | * http://localhost:3000/cities?name=Chicago&state=IL
 39 | * http://localhost:3000/cities?state=NY,MA
 40 | * http://localhost:3000/cities?state=CA&fields=name,size
 41 | 
 42 | To use your own data, you can set a different directory, for example:
 43 | 
 44 | ```
 45 | export DATA_PATH='./data'
 46 | ```
 47 | 
 48 | 1. Put csv files into /data
 49 | 1. Import files from /data: ```rake import``` (or restart the app)
 50 |    1. There can be multiple files (must end in .csv)
 51 |    1. Optional [data.yaml](sample-data/data.yaml) file that specifies  index name, API endpoint, file list, and a dictionary of column -> field name mapping and types
 52 |         1. Optionally import all the columns, not just ones specified in dictionary (see example: [import: all](spec/fixtures/import_with_options/data.yaml))
 53 |         1. If data.yaml not provided, all fields and fields will be imported with folder or bucket name used as the API endpoint (name is 'slugified' with dashes replacing spaces)
 54 | 1. api endpoint to get the data /api=endpoint?field_or_column_name=value
 55 | 
 56 | ## More Configuration Options
 57 | 
 58 | Often while you are developing an API and data dictionary,
 59 | it is helpful to include all the columns in the csv.  If you add the following to
 60 | data.yaml, the field names and types from the dictionary will be used and any
 61 | unspecified columns will simply use the column name as the field name.
 62 | 
 63 | ```
 64 | options:
 65 |   columns: all
 66 | ```
 67 | 
 68 | You can use the dictionary to provide nice errors to developers who use the API.
 69 | This can be used in conjunction with the above ```columns: all``` which will
 70 | make it so that columns that are not referenced in the dictionary are not
 71 | searchable, but will make it so that unspecified fields cause errors to be
 72 | reported.
 73 | 
 74 | ```
 75 | options:
 76 |   search: dictionary_only
 77 | ```
 78 | 
 79 | Also for debugging, you can limit the number of files that will be imported.  This is helpful when the import process is time consuming because you have many, many files, but can test format changes with a subset of the files.
 80 | 
 81 | ```
 82 | options:
 83 |   limit: 4
 84 | ```
 85 | 
 86 | 
 87 | 
 88 | ## Help Wanted
 89 | 
 90 | 1. Try out importing multiple data sets with different endpoints and data.yaml configuration
 91 | 2. Take a look at our [open issues](https://github.com/18F/open-data-maker/issues) and our [Contribution Guide](CONTRIBUTING.md)
 92 | 
 93 | ## More Info
 94 | 
 95 | Here's how it might look in the future:
 96 | 
 97 | ![overview of data types, prompt to download data, create a custom data set, or look at API docs](/doc/data-overview.png)
 98 | 
 99 | 
100 | ![Download all the data or make choices to create a csv with a subset](/doc/csv-download.png)
101 | 
102 | ### Acknowledgements
103 | Zipcode latitude and longitude provided by [GeoNames](http://www.geonames.org/) under under a [Creative Commons Attribution 3.0 License](http://creativecommons.org/licenses/by/3.0/).
104 | 
105 | ### Public domain
106 | 
107 | Except as noted above, this project is in the worldwide [public domain](LICENSE.md). As stated in [CONTRIBUTING](CONTRIBUTING.md):
108 | 
109 | > This project is in the public domain within the United States, and copyright and related rights in the work worldwide are waived through the [CC0 1.0 Universal public domain dedication](https://creativecommons.org/publicdomain/zero/1.0/).
110 | >
111 | > All contributions to this project will be released under the CC0 dedication. By submitting a pull request, you are agreeing to comply with this waiver of copyright interest.
112 | 


--------------------------------------------------------------------------------
/lib/data_magic/error_checker.rb:
--------------------------------------------------------------------------------
  1 | module DataMagic
  2 |   module ErrorChecker
  3 |     class << self
  4 |       def check(params, options, config)
  5 |         report_required_params_absent(options) +
  6 |           report_nonexistent_params(params, config) +
  7 |           report_nonexistent_operators(params) +
  8 |           report_nonexistent_fields(options[:fields], config) +
  9 |           report_bad_range_argument(params) +
 10 |           report_wrong_field_type(params, config) +
 11 |           report_wrong_zip(options) +
 12 |           report_distance_requires_zip(options)
 13 |       end
 14 | 
 15 |       private
 16 | 
 17 |       def report_required_params_absent(options)
 18 |         if options[:command] == 'stats' && options[:fields].length == 0
 19 |           [build_error(error: 'invalid_or_incomplete_parameters', input: options[:command])]
 20 |         else
 21 |           []
 22 |         end
 23 |       end
 24 | 
 25 |       def report_distance_requires_zip(params)
 26 |         # if distance, must have zip
 27 |         return [] if (params[:distance] && params[:zip]) || (!params[:distance])
 28 |         [build_error(
 29 |           error: 'distance_error'
 30 |         )]
 31 |       end
 32 | 
 33 |       def report_wrong_zip(params)
 34 |         return [] if !params[:zip] || Zipcode.valid?(params[:zip])
 35 |         [build_error(
 36 |           error: 'zipcode_error',
 37 |           parameter: :zip,
 38 |           input: params[:zip].to_s
 39 |         )]
 40 |       end
 41 | 
 42 |       def report_nonexistent_params(params, config)
 43 |         return [] unless config.dictionary_only_search?
 44 |         params.keys.reject { |p| config.field_type(strip_op(p)) }.
 45 |           map { |p| build_error(error: 'parameter_not_found', input: strip_op(p)) }
 46 |       end
 47 | 
 48 |       def report_nonexistent_operators(params)
 49 |         params.keys.select { |p| p =~ /__(\w+)$/ && $1 !~ /range|not|ne/i }.
 50 |           map do |p|
 51 |             (param, op) = p.match(/^(.*)__(\w+)$/).captures
 52 |             build_error(error: 'operator_not_found', parameter: param, input: op)
 53 |           end
 54 |       end
 55 | 
 56 |       def report_nonexistent_fields(fields, config)
 57 |         if fields && !fields.empty? && config.dictionary_only_search?
 58 |           fields.reject { |f| config.field_type(f.to_s) }.
 59 |             map { |f| build_error(error: 'field_not_found', input: f.to_s) }
 60 |         else
 61 |           []
 62 |         end
 63 |       end
 64 | 
 65 |       def report_bad_range_argument(params)
 66 |         ranges = params.select do |p,v|
 67 |           p =~ /__range$/ and
 68 |             v !~ / ^(\d+(\.\d+)?)? # optional starting number
 69 |                    \.\.           # range dots
 70 |                    (\d+(\.\d+)?)?  # optional ending number
 71 |                    (,(\d+(\.\d+)?)?\.\.(\d+(\.\d+)?)?)* # and more, with commas
 72 |                    $/x
 73 |         end
 74 |         ranges.map do |p,v|
 75 |           build_error(error: 'range_format_error', parameter: strip_op(p), input: v)
 76 |         end
 77 |       end
 78 | 
 79 |       def report_wrong_field_type(params, config)
 80 |         bad_fields = params.select do |p, v|
 81 |           next false if p =~ /__range$/
 82 |           param_type = config.field_type(strip_op(p))
 83 |           value_type = guess_value_type(v)
 84 |           (param_type == "float" && value_type != "float" && value_type != "integer") or
 85 |             (param_type == "integer" && value_type != "integer")
 86 |         end
 87 |         bad_fields.map do |p, v|
 88 |           build_error(error: 'parameter_type_error', parameter: p, input: v,
 89 |                       expected_type: config.field_type(strip_op(p)),
 90 |                       input_type: guess_value_type(v))
 91 |         end
 92 |       end
 93 | 
 94 |       def build_error(opts)
 95 |         opts[:message] =
 96 |           case opts[:error]
 97 |           when 'invalid_or_incomplete_parameters'
 98 |             "The command #{opts[:input]} requires a fields parameter."
 99 |           when 'parameter_not_found'
100 |             "The input parameter '#{opts[:input]}' is not known in this dataset."
101 |           when 'field_not_found'
102 |             "The input field '#{opts[:input]}' (in the fields parameter) is not a field in this dataset."
103 |           when 'operator_not_found'
104 |             "The input operator '#{opts[:input]}' (appended to the parameter '#{opts[:parameter]}') is not known or supported. (Known operators: range, ne, not)"
105 |           when 'parameter_type_error'
106 |             "The parameter '#{opts[:parameter]}' expects a value of type #{opts[:expected_type]}, but received '#{opts[:input]}' which is a value of type #{opts[:input_type]}."
107 |           when 'range_format_error'
108 |             "The range '#{opts[:input]}' supplied to parameter '#{opts[:parameter]}' isn't in the correct format."
109 |           when 'zipcode_error'
110 |             "The provided zipcode, '#{opts[:input]}', is not valid."
111 |           when 'distance_error'
112 |             "Use of the 'distance' parameter also requires a 'zip' parameter."
113 |           end
114 |         opts
115 |       end
116 | 
117 |       def guess_value_type(value)
118 |         case value.to_s
119 |         when /^-?\d+$/
120 |           "integer"
121 |         when /^(-?\d+,?)+$/ # list of integers
122 |           "integer"
123 |         when /^-?\d+\.\d+$/
124 |           "float"
125 |         else
126 |           "string"
127 |         end
128 |       end
129 | 
130 |       def strip_op(param)
131 |         param.sub(/__\w+$/, '')
132 |       end
133 |     end
134 |   end
135 | end
136 | 


--------------------------------------------------------------------------------
/lib/data_magic/query_builder.rb:
--------------------------------------------------------------------------------
  1 | module DataMagic
  2 |   module QueryBuilder
  3 |     class << self
  4 |       # Creates query from parameters passed into endpoint
  5 |       def from_params(params, options, config)
  6 |         per_page = (options[:per_page] || config.page_size || DataMagic::DEFAULT_PAGE_SIZE).to_i
  7 |         page = options[:page].to_i || 0
  8 |         per_page = DataMagic::MAX_PAGE_SIZE if per_page > DataMagic::MAX_PAGE_SIZE
  9 |         query_hash = {
 10 |           from:   page * per_page,
 11 |           size:   per_page,
 12 |         }
 13 | 
 14 |         query_hash[:query] = generate_squery(params, options, config).to_search
 15 | 
 16 |         if options[:command] == 'stats'
 17 |           query_hash.merge! add_aggregations(params, options, config)
 18 |         end
 19 | 
 20 |         if options[:fields] && !options[:fields].empty?
 21 |           query_hash[:fields] = get_restrict_fields(options)
 22 |           query_hash[:_source] = false
 23 |         else
 24 |           query_hash[:_source] = {
 25 |                       exclude: ["_*"]
 26 |                     }
 27 |         end
 28 |         query_hash[:sort] = get_sort_order(options[:sort], config) if options[:sort] && !options[:sort].empty?
 29 |         query_hash
 30 |       end
 31 | 
 32 |       private
 33 | 
 34 |       def generate_squery(params, options, config)
 35 |         squery = Stretchy.query(type: 'document')
 36 |         squery = search_location(squery, options)
 37 |         search_fields_and_ranges(squery, params, config)
 38 |       end
 39 | 
 40 |       # Wrapper for Stretchy aggregation clause builder (which wraps ElasticSearch (ES) :aggs parameter)
 41 |       # Extracts all extended_stats aggregations from ES, to be filtered later
 42 |       # Is a no-op if no fields are specified, or none of them are numeric
 43 |       def add_aggregations(params, options, config)
 44 |         agg_hash = options[:fields].inject({}) do |memo, f|
 45 |           if config.column_field_types[f.to_s] && ["integer", "float"].include?(config.column_field_types[f.to_s])
 46 |             memo[f.to_s] = { extended_stats: { "field" => f.to_s } }
 47 |           end
 48 |           memo
 49 |         end
 50 | 
 51 |         agg_hash.empty? ? {} : { aggs: agg_hash }
 52 |       end
 53 | 
 54 |       def get_restrict_fields(options)
 55 |         options[:fields].map(&:to_s)
 56 |       end
 57 | 
 58 |       # @description turns a string like "state,population:desc" into [{'state' => {order: 'asc'}},{ "population" => {order: "desc"} }]
 59 |       # @param [String] sort_param
 60 |       # @return [Array]
 61 |       def get_sort_order(sort_param, config)
 62 |         sort_param.to_s.scan(/(\w+[\.\w]*):?(\w*)/).map do |field_name, direction|
 63 |           direction = 'asc' if direction.empty?
 64 |           type = config.field_type(field_name)
 65 |           # for 'autocomplete' search on lowercase not analyzed indexed in _name
 66 |           field_name = "_#{field_name}" if type  == 'autocomplete'
 67 |           { field_name => { order: direction } }
 68 |         end
 69 |       end
 70 | 
 71 |       def to_number(value)
 72 |         value =~ /\./ ? value.to_f : value.to_i
 73 |       end
 74 | 
 75 |       def search_fields_and_ranges(squery, params, config)
 76 |         params.each do |param, value|
 77 |           field_type = config.field_type(param)
 78 |           if field_type == "name"
 79 |             squery = include_name_query(squery, param, value)
 80 |           elsif field_type == "autocomplete"
 81 |             squery = autocomplete_query(squery, param, value)
 82 |           elsif match = /(.+)__(range|ne|not)\z/.match(param)
 83 |             field, operator = match.captures.map(&:to_sym)
 84 |             squery = range_query(squery, operator, field, value)
 85 |           elsif field_type == "integer" && value.is_a?(String) && /,/.match(value) # list of integers
 86 |             squery = integer_list_query(squery, param, value)
 87 |           else # field equality
 88 |             squery = squery.where(param => value)
 89 |           end
 90 |         end
 91 |         squery
 92 |       end
 93 | 
 94 |       def include_name_query(squery, field, value)
 95 |         value = value.split(' ').map { |word| "#{word}*"}.join(' ')
 96 |         squery.match.query(
 97 |           # we store lowercase name in field with prefix _
 98 |           "wildcard": { "_#{field}" => { "value": value.downcase } }
 99 |         )
100 |       end
101 | 
102 |       def range_query(squery, operator, field, value)
103 |         if operator == :ne or operator == :not # field negation
104 |           squery.where.not(field => value)
105 |         else # field range
106 |           squery.filter(
107 |             or: build_ranges(field, value.split(','))
108 |           )
109 |         end
110 |       end
111 | 
112 |       def autocomplete_query(squery, field, value)
113 |         squery.match.query(
114 |           common: {
115 |             field => {
116 |               query: value,
117 |               cutoff_frequency: 0.001,
118 |               low_freq_operator: "and"
119 |             }
120 |           })
121 |       end
122 | 
123 |       def integer_list_query(squery, field, value)
124 |         squery.filter(
125 |           terms: {
126 |             field => value.split(',').map(&:to_i) }
127 |         )
128 |       end
129 | 
130 |       def build_ranges(field, range_strings)
131 |         range_strings.map do |range|
132 |           min, max = range.split('..')
133 |           values = {}
134 |           values[:gte] = to_number(min) unless min.empty?
135 |           values[:lte] = to_number(max) if max
136 |           {
137 |             range: { field => values }
138 |           }
139 |         end
140 |       end
141 | 
142 |       # Handles location (currently only uses SFO location)
143 |       def search_location(squery, options)
144 |         distance = options[:distance]
145 |         location = Zipcode.latlon(options[:zip])
146 | 
147 |         if distance && !distance.empty?
148 |           # default to miles if no distance given
149 |           unit = distance[-2..-1]
150 |           distance = "#{distance}mi" if unit != "km" and unit != "mi"
151 | 
152 |           squery = squery.geo('location', distance: distance, lat: location[:lat], lng: location[:lon])
153 |         end
154 |         squery
155 |       end
156 |     end
157 |   end
158 | end
159 | 


--------------------------------------------------------------------------------
/spec/lib/data_magic/config_spec.rb:
--------------------------------------------------------------------------------
  1 | require 'spec_helper'
  2 | 
  3 | describe DataMagic::Config do
  4 |   before(:all) do
  5 |     ENV['DATA_PATH'] = './spec/fixtures/import_with_dictionary'
  6 |   end
  7 | 
  8 |   it "detects data.yml files" do
  9 |     ENV['DATA_PATH'] = './spec/fixtures/cities_with_yml'
 10 |     config = DataMagic::Config.new
 11 |     expect(config.data["api"]).to eq("cities")
 12 |   end
 13 | 
 14 |   describe 'slugification' do
 15 |     it 'slugifies local paths' do
 16 |       config = DataMagic::Config.new
 17 |       slugified = config.clean_index('path/to/my_directory')
 18 |       expect(slugified).to eq('my-directory')
 19 |     end
 20 | 
 21 |     it 'slugifes s3 bucket names' do
 22 |       config = DataMagic::Config.new
 23 |       slugified = config.clean_index('s3://user:pass@my_bucket')
 24 |       expect(slugified).to eq('my-bucket')
 25 |     end
 26 |   end
 27 | 
 28 |   context "s3" do
 29 |     it "detects data.yaml" do
 30 |       ENV['DATA_PATH'] = 's3://mybucket'
 31 |       fake_s3 = class_spy("Fake Aws::S3::Client")
 32 |       fake_get_object_response = double(
 33 |         "S3 response",
 34 |         body: StringIO.new({ 'index' => 'fake-index' }.to_yaml),
 35 |         isOK: true,
 36 |         status: 200
 37 |       )
 38 |       allow(fake_s3).to receive(:get_object)
 39 |         .with(bucket: 'mybucket', key: 'data.yaml', response_target: duck_type(:read))
 40 |         .and_return(fake_get_object_response)
 41 |       config = DataMagic::Config.new(s3: fake_s3)
 42 |       expect(config.s3).to eq(fake_s3)
 43 |       expect(config.data["index"]).to eq("fake-index")
 44 |     end
 45 | 
 46 |     it "raises error if s3 errors" do
 47 |       ENV['DATA_PATH'] = 's3://mybucket'
 48 |       fake_s3 = class_spy("Fake Aws::S3::Client")
 49 | 
 50 |       allow(fake_s3).to receive(:get_object)
 51 |         .with(bucket: 'mybucket', key: 'data.yaml', response_target: duck_type(:read))
 52 |         .and_raise(RuntimeError)
 53 |       expect {
 54 |         DataMagic::Config.new(s3: fake_s3)
 55 |       }.to raise_error(RuntimeError)
 56 |     end
 57 | 
 58 |   end
 59 | 
 60 |   context "create" do
 61 |     it "works with zero args" do
 62 |       expect(DataMagic::Config.new).to_not be_nil
 63 |     end
 64 |     it "can set s3 client" do
 65 |       # TODO: mock s3
 66 |       s3_client = "s3 client"
 67 |       config = DataMagic::Config.new(s3: s3_client)
 68 |       expect(config.s3).to eq(s3_client)
 69 |     end
 70 |   end
 71 | 
 72 |   context "when loaded" do
 73 |     let(:config) { DataMagic::Config.new }
 74 | 
 75 |     after do
 76 |       config.clear_all
 77 |     end
 78 | 
 79 |     context "#scoped_index_name" do
 80 |       it "includes environment prefix" do
 81 |         expect(config.scoped_index_name).to eq('test-city-data')
 82 |       end
 83 |     end
 84 | 
 85 |     it "has config data" do
 86 |       default_config = {
 87 |         "version" => "cities100-2010",
 88 |         "index" => "city-data", "api" => "cities",
 89 |         "files" => [{ "name" => "cities100.csv" }],
 90 |         "options" => {:search=>"dictionary_only"},
 91 |         "unique" => ["name"],
 92 |         "data_path" => "./sample-data"
 93 |       }
 94 |       expect(config.data.keys).to include('dictionary')
 95 |       dictionary = config.data.delete 'dictionary'
 96 | 
 97 |       expect(dictionary.keys.sort).to eq %w(id code name state population
 98 |         location.lat location.lon land_area area.water).sort
 99 |       categories = config.data.delete 'categories'
100 |       expect(categories.keys.sort).to eq %w(general general2 general3 general4 general5 geographic).sort
101 |       expect(config.data).to eq(default_config)
102 |     end
103 | 
104 |     it "has default page size" do
105 |       expect(DataMagic::DEFAULT_PAGE_SIZE).to_not be_nil
106 |       expect(config.page_size).to eq(DataMagic::DEFAULT_PAGE_SIZE)
107 |     end
108 | 
109 |     describe "#update_indexed_config" do # rename ... or do this in load_config or something
110 |       context "after loading config" do
111 |         let(:fixture_path) { "./spec/fixtures/import_with_dictionary" }
112 |         before do
113 |           config.load_datayaml(fixture_path)
114 |         end
115 |         it "should be true" do
116 |           expect(config.update_indexed_config).to be true
117 |         end
118 |         it "should set new data_path" do
119 |           expect(config.data_path).to eq(fixture_path)
120 |         end
121 | 
122 |         it "twice should be false" do
123 |           config.update_indexed_config
124 |           expect(config.update_indexed_config).to be false
125 |         end
126 |       end
127 |     end
128 | 
129 |     describe "when has a custom null_value" do
130 |       it 'should have a default null value' do
131 |         expect(config.null_value).to eq('NULL')
132 |       end
133 | 
134 |       it 'should set null value field' do
135 |         config.load_datayaml("./spec/fixtures/import_with_null_value")
136 |         expect(config.null_value).to eq('abc123')
137 |       end
138 |     end
139 |   end
140 | 
141 |   context ".calculated_field_list" do
142 |     let(:config) { DataMagic::Config.new(load_datayaml: false) }
143 |     it "finds fields with 'calculate' property" do
144 |       allow(config).to receive(:dictionary).and_return(
145 |         {
146 |           one: {
147 |             source: 'column1',
148 |             type: 'float'
149 |           },
150 |           two: {
151 |             source: 'column2',
152 |             type: 'float'
153 |           },
154 |           all: {
155 |                 calculate: 'column1 or column2',
156 |                 type: 'float',
157 |                 description: 'something'
158 |             }
159 |         }
160 |       )
161 |       expect(config.calculated_field_list).to eq(['all'])
162 |     end
163 |   end
164 | 
165 |   context ".only_field_list" do
166 |     let(:config) { DataMagic::Config.new(load_datayaml: false) }
167 |     let(:simple_fields) do
168 |       { 'one' => 'column1', 'two' => 'column2', 'three' => 'column3' }
169 |     end
170 |     let(:fields_with_dots) do
171 |       { 'one' => 'column1', 'two.a' => 'column2a', 'two.b' => 'column2b' }
172 |     end
173 | 
174 |     it "selects a subset" do
175 |       expect(config.only_field_list(%w(one two), simple_fields)).to eq(
176 |         'one' => 'column1', 'two' => 'column2'
177 |       )
178 |     end
179 | 
180 |     it "selects fields with dots" do
181 |       expect(config.only_field_list(%w(two), fields_with_dots)).to eq(
182 |         'two.a' => 'column2a', 'two.b' => 'column2b'
183 |       )
184 |     end
185 |   end
186 | end
187 | 


--------------------------------------------------------------------------------
/spec/fixtures/cities_with_yml/cities51-100.csv:
--------------------------------------------------------------------------------
 1 | "USPS","GEOID","ANSICODE","NAME","LSAD","FUNCSTAT","POP10","HU10","ALAND","AWATER","ALAND_SQMI","AWATER_SQMI","INTPTLAT","INTPTLONG"
 2 | "TX","4804000","02409731","Arlington","25","A","365438","144805","248332497","9690024","95.882","3.741","32.700708","-97.124691"
 3 | "CA","0603526","02409774","Bakersfield","25","A","347483","120725","368204317","3741691","142.164","1.445","35.321213","-119.018291"
 4 | "LA","2255000","00545142","New Orleans","25","A","343829","189896","438803381","468240430","169.423","180.789","30.068636","-89.939007"
 5 | "HI","1571550","02630783","Urban Honolulu CDP","57","S","337256","143173","156748036","20484151","60.521","7.909","21.325852","-157.845315"
 6 | "CA","0602000","02409704","Anaheim","25","A","336265","104237","129073275","2526668","49.835","0.976","33.855497","-117.760071"
 7 | "FL","1271000","02405568","Tampa","25","A","335709","157130","293727878","160127838","113.409","61.826","27.970086","-82.479673"
 8 | "CO","0804000","02409757","Aurora","25","A","325078","131040","400759192","1806832","154.734","0.698","39.688002","-104.689740"
 9 | "CA","0669000","02411814","Santa Ana","25","A","324528","76896","70627761","643479","27.270","0.248","33.736478","-117.882593"
10 | "MO","2965000","00767557","St. Louis","25","A","319294","176002","160343174","10683076","61.909","4.125","38.635699","-90.244582"
11 | "PA","4261000","01214818","Pittsburgh","25","A","305704","156165","143399923","7693613","55.367","2.971","40.439753","-79.976592"
12 | "TX","4817000","02410234","Corpus Christi","25","A","305215","125469","415982136","852055055","160.612","328.980","27.754252","-97.173385"
13 | "CA","0662000","02410965","Riverside","25","A","303871","98444","210152356","788400","81.140","0.304","33.938143","-117.393168"
14 | "OH","3915000","01086201","Cincinnati","25","A","296943","161095","201869928","4155439","77.942","1.604","39.139902","-84.506446"
15 | "KY","2146027","02405089","Lexington-Fayette urban county","UC","A","295803","135160","734648526","4922803","283.649","1.901","38.040157","-84.458443"
16 | "AK","0203000","02419025","Anchorage municipality","37","A","291826","113032","4415108963","663860984","1704.683","256.318","61.177549","-149.274354"
17 | "CA","0675000","02411987","Stockton","25","A","291707","99637","159723404","7984682","61.670","3.083","37.976342","-121.313304"
18 | "OH","3977000","01086537","Toledo","25","A","287208","138039","208991246","8889079","80.692","3.432","41.664071","-83.581861"
19 | "MN","2758000","02396511","St. Paul","25","A","285068","120795","134623737","10875208","51.979","4.199","44.948869","-93.103855"
20 | "NJ","3451000","00885317","Newark","25","A","277140","109520","62643850","4972876","24.187","1.920","40.724220","-74.172574"
21 | "NC","3728000","02403745","Greensboro","25","A","269666","124074","327673360","13690607","126.515","5.286","36.096483","-79.827108"
22 | "NY","3611000","00978764","Buffalo","25","A","261310","133444","104594197","31364094","40.384","12.110","42.892492","-78.859686"
23 | "TX","4858016","02411437","Plano","25","A","259841","103672","185394655","937663","71.581","0.362","33.050769","-96.747944"
24 | "NE","3128000","02395713","Lincoln","25","A","258379","110546","230804010","3229386","89.114","1.247","40.808957","-96.680354"
25 | "NV","3231900","02410741","Henderson","25","A","257729","113586","279023542","0","107.732","0.000","36.012233","-115.037462"
26 | "IN","1825000","02394798","Fort Wayne","25","A","253691","113541","286500436","553423","110.618","0.214","41.088173","-85.143880"
27 | "NJ","3436000","00885264","Jersey","25","A","247597","108720","38315542","16280557","14.794","6.286","40.711417","-74.064760"
28 | "FL","1263000","02405401","St. Petersburg","25","A","244769","129401","159909751","196473878","61.742","75.859","27.761976","-82.644055"
29 | "CA","0613392","02409461","Chula Vista","25","A","243916","79416","128544675","6380068","49.631","2.463","32.627670","-117.015170"
30 | "VA","5157000","01498557","Norfolk","25","A","242803","95018","140171293","109376999","54.120","42.231","36.923015","-76.244641"
31 | "FL","1253000","02404443","Orlando","25","A","238300","121254","265203107","21469603","102.395","8.289","28.415886","-81.298750"
32 | "AZ","0412000","02409433","Chandler","25","A","236123","94404","166828220","289715","64.413","0.112","33.282874","-111.854943"
33 | "TX","4841464","02411626","Laredo","25","A","236091","68610","230271380","3754983","88.908","1.450","27.547681","-99.486931"
34 | "WI","5548000","01583625","Madison","25","A","233209","108843","198882058","44658619","76.789","17.243","43.087806","-89.430121"
35 | "NC","3775000","02405771","Winston-Salem","25","A","229617","103974","343041264","3228612","132.449","1.247","36.103262","-80.260578"
36 | "TX","4845000","02410892","Lubbock","25","A","229573","95926","317041399","2962034","122.410","1.144","33.566479","-101.886677"
37 | "LA","2205000","02403821","Baton Rouge","25","B","229493","100801","199291656","5588234","76.947","2.158","30.448454","-91.125899"
38 | "NC","3719000","02403521","Durham","25","A","228330","103221","278087581","2357401","107.370","0.910","35.980964","-78.905647"
39 | "TX","4829000","02410572","Garland","25","A","226876","80834","147848881","340126","57.085","0.131","32.909826","-96.630357"
40 | "AZ","0427820","02410596","Glendale","25","A","226721","90505","155337275","401624","59.976","0.155","33.533111","-112.189901"
41 | "NV","3260600","02410923","Reno","25","A","225221","102582","266792840","7423507","103.009","2.866","39.474487","-119.776538"
42 | "FL","1230000","02404689","Hialeah","25","A","224669","74067","55554697","3599730","21.450","1.390","25.869941","-80.302865"
43 | "NV","3254600","02409023","Paradise CDP","57","S","223167","114296","120996826","0","46.717","0.000","36.080689","-115.136839"
44 | "VA","5116000","01498558","Chesapeake","25","A","222209","83196","882669156","26052854","340.800","10.059","36.679376","-76.301788"
45 | "AZ","0465000","02411845","Scottsdale","25","A","217385","124001","476350341","1231086","183.920","0.475","33.668727","-111.823682"
46 | "NV","3251800","02411273","North Las Vegas","25","A","216961","76073","262483131","112001","101.345","0.043","36.282974","-115.089262"
47 | "TX","4837000","02410117","Irving","25","A","216290","91128","173573892","2594600","67.017","1.002","32.857748","-96.970022"
48 | "CA","0626000","02410545","Fremont","25","A","214089","73989","200617968","26291598","77.459","10.151","37.494373","-121.941117"
49 | "CA","0636770","02410116","Irvine","25","A","212375","83899","171214072","900908","66.106","0.348","33.678399","-117.771254"
50 | "AL","0107000","02403868","Birmingham","25","A","212237","108981","378310927","6590665","146.067","2.545","33.527444","-86.799047"
51 | "NY","3663000","00979426","Rochester","25","A","210565","97158","92671789","3558427","35.781","1.374","43.169927","-77.616891"
52 | 


--------------------------------------------------------------------------------
/spec/fixtures/cities_without_yml/cities51-100.csv:
--------------------------------------------------------------------------------
 1 | "USPS","GEOID","ANSICODE","NAME","LSAD","FUNCSTAT","POP10","HU10","ALAND","AWATER","ALAND_SQMI","AWATER_SQMI","INTPTLAT","INTPTLONG"
 2 | "TX","4804000","02409731","Arlington","25","A","365438","144805","248332497","9690024","95.882","3.741","32.700708","-97.124691"
 3 | "CA","0603526","02409774","Bakersfield","25","A","347483","120725","368204317","3741691","142.164","1.445","35.321213","-119.018291"
 4 | "LA","2255000","00545142","New Orleans","25","A","343829","189896","438803381","468240430","169.423","180.789","30.068636","-89.939007"
 5 | "HI","1571550","02630783","Urban Honolulu CDP","57","S","337256","143173","156748036","20484151","60.521","7.909","21.325852","-157.845315"
 6 | "CA","0602000","02409704","Anaheim","25","A","336265","104237","129073275","2526668","49.835","0.976","33.855497","-117.760071"
 7 | "FL","1271000","02405568","Tampa","25","A","335709","157130","293727878","160127838","113.409","61.826","27.970086","-82.479673"
 8 | "CO","0804000","02409757","Aurora","25","A","325078","131040","400759192","1806832","154.734","0.698","39.688002","-104.689740"
 9 | "CA","0669000","02411814","Santa Ana","25","A","324528","76896","70627761","643479","27.270","0.248","33.736478","-117.882593"
10 | "MO","2965000","00767557","St. Louis","25","A","319294","176002","160343174","10683076","61.909","4.125","38.635699","-90.244582"
11 | "PA","4261000","01214818","Pittsburgh","25","A","305704","156165","143399923","7693613","55.367","2.971","40.439753","-79.976592"
12 | "TX","4817000","02410234","Corpus Christi","25","A","305215","125469","415982136","852055055","160.612","328.980","27.754252","-97.173385"
13 | "CA","0662000","02410965","Riverside","25","A","303871","98444","210152356","788400","81.140","0.304","33.938143","-117.393168"
14 | "OH","3915000","01086201","Cincinnati","25","A","296943","161095","201869928","4155439","77.942","1.604","39.139902","-84.506446"
15 | "KY","2146027","02405089","Lexington-Fayette urban county","UC","A","295803","135160","734648526","4922803","283.649","1.901","38.040157","-84.458443"
16 | "AK","0203000","02419025","Anchorage municipality","37","A","291826","113032","4415108963","663860984","1704.683","256.318","61.177549","-149.274354"
17 | "CA","0675000","02411987","Stockton","25","A","291707","99637","159723404","7984682","61.670","3.083","37.976342","-121.313304"
18 | "OH","3977000","01086537","Toledo","25","A","287208","138039","208991246","8889079","80.692","3.432","41.664071","-83.581861"
19 | "MN","2758000","02396511","St. Paul","25","A","285068","120795","134623737","10875208","51.979","4.199","44.948869","-93.103855"
20 | "NJ","3451000","00885317","Newark","25","A","277140","109520","62643850","4972876","24.187","1.920","40.724220","-74.172574"
21 | "NC","3728000","02403745","Greensboro","25","A","269666","124074","327673360","13690607","126.515","5.286","36.096483","-79.827108"
22 | "NY","3611000","00978764","Buffalo","25","A","261310","133444","104594197","31364094","40.384","12.110","42.892492","-78.859686"
23 | "TX","4858016","02411437","Plano","25","A","259841","103672","185394655","937663","71.581","0.362","33.050769","-96.747944"
24 | "NE","3128000","02395713","Lincoln","25","A","258379","110546","230804010","3229386","89.114","1.247","40.808957","-96.680354"
25 | "NV","3231900","02410741","Henderson","25","A","257729","113586","279023542","0","107.732","0.000","36.012233","-115.037462"
26 | "IN","1825000","02394798","Fort Wayne","25","A","253691","113541","286500436","553423","110.618","0.214","41.088173","-85.143880"
27 | "NJ","3436000","00885264","Jersey","25","A","247597","108720","38315542","16280557","14.794","6.286","40.711417","-74.064760"
28 | "FL","1263000","02405401","St. Petersburg","25","A","244769","129401","159909751","196473878","61.742","75.859","27.761976","-82.644055"
29 | "CA","0613392","02409461","Chula Vista","25","A","243916","79416","128544675","6380068","49.631","2.463","32.627670","-117.015170"
30 | "VA","5157000","01498557","Norfolk","25","A","242803","95018","140171293","109376999","54.120","42.231","36.923015","-76.244641"
31 | "FL","1253000","02404443","Orlando","25","A","238300","121254","265203107","21469603","102.395","8.289","28.415886","-81.298750"
32 | "AZ","0412000","02409433","Chandler","25","A","236123","94404","166828220","289715","64.413","0.112","33.282874","-111.854943"
33 | "TX","4841464","02411626","Laredo","25","A","236091","68610","230271380","3754983","88.908","1.450","27.547681","-99.486931"
34 | "WI","5548000","01583625","Madison","25","A","233209","108843","198882058","44658619","76.789","17.243","43.087806","-89.430121"
35 | "NC","3775000","02405771","Winston-Salem","25","A","229617","103974","343041264","3228612","132.449","1.247","36.103262","-80.260578"
36 | "TX","4845000","02410892","Lubbock","25","A","229573","95926","317041399","2962034","122.410","1.144","33.566479","-101.886677"
37 | "LA","2205000","02403821","Baton Rouge","25","B","229493","100801","199291656","5588234","76.947","2.158","30.448454","-91.125899"
38 | "NC","3719000","02403521","Durham","25","A","228330","103221","278087581","2357401","107.370","0.910","35.980964","-78.905647"
39 | "TX","4829000","02410572","Garland","25","A","226876","80834","147848881","340126","57.085","0.131","32.909826","-96.630357"
40 | "AZ","0427820","02410596","Glendale","25","A","226721","90505","155337275","401624","59.976","0.155","33.533111","-112.189901"
41 | "NV","3260600","02410923","Reno","25","A","225221","102582","266792840","7423507","103.009","2.866","39.474487","-119.776538"
42 | "FL","1230000","02404689","Hialeah","25","A","224669","74067","55554697","3599730","21.450","1.390","25.869941","-80.302865"
43 | "NV","3254600","02409023","Paradise CDP","57","S","223167","114296","120996826","0","46.717","0.000","36.080689","-115.136839"
44 | "VA","5116000","01498558","Chesapeake","25","A","222209","83196","882669156","26052854","340.800","10.059","36.679376","-76.301788"
45 | "AZ","0465000","02411845","Scottsdale","25","A","217385","124001","476350341","1231086","183.920","0.475","33.668727","-111.823682"
46 | "NV","3251800","02411273","North Las Vegas","25","A","216961","76073","262483131","112001","101.345","0.043","36.282974","-115.089262"
47 | "TX","4837000","02410117","Irving","25","A","216290","91128","173573892","2594600","67.017","1.002","32.857748","-96.970022"
48 | "CA","0626000","02410545","Fremont","25","A","214089","73989","200617968","26291598","77.459","10.151","37.494373","-121.941117"
49 | "CA","0636770","02410116","Irvine","25","A","212375","83899","171214072","900908","66.106","0.348","33.678399","-117.771254"
50 | "AL","0107000","02403868","Birmingham","25","A","212237","108981","378310927","6590665","146.067","2.545","33.527444","-86.799047"
51 | "NY","3663000","00979426","Rochester","25","A","210565","97158","92671789","3558427","35.781","1.374","43.169927","-77.616891"
52 | 


--------------------------------------------------------------------------------
/spec/fixtures/import_with_dictionary/cities51-100.csv:
--------------------------------------------------------------------------------
 1 | "USPS","GEOID","ANSICODE","NAME","LSAD","FUNCSTAT","POP10","HU10","ALAND","AWATER","ALAND_SQMI","AWATER_SQMI","INTPTLAT","INTPTLONG"
 2 | "TX","4804000","02409731","Arlington","25","A","365438","144805","248332497","9690024","95.882","3.741","32.700708","-97.124691"
 3 | "CA","0603526","02409774","Bakersfield","25","A","347483","120725","368204317","3741691","142.164","1.445","35.321213","-119.018291"
 4 | "LA","2255000","00545142","New Orleans","25","A","343829","189896","438803381","468240430","169.423","180.789","30.068636","-89.939007"
 5 | "HI","1571550","02630783","Urban Honolulu CDP","57","S","337256","143173","156748036","20484151","60.521","7.909","21.325852","-157.845315"
 6 | "CA","0602000","02409704","Anaheim","25","A","336265","104237","129073275","2526668","49.835","0.976","33.855497","-117.760071"
 7 | "FL","1271000","02405568","Tampa","25","A","335709","157130","293727878","160127838","113.409","61.826","27.970086","-82.479673"
 8 | "CO","0804000","02409757","Aurora","25","A","325078","131040","400759192","1806832","154.734","0.698","39.688002","-104.689740"
 9 | "CA","0669000","02411814","Santa Ana","25","A","324528","76896","70627761","643479","27.270","0.248","33.736478","-117.882593"
10 | "MO","2965000","00767557","St. Louis","25","A","319294","176002","160343174","10683076","61.909","4.125","38.635699","-90.244582"
11 | "PA","4261000","01214818","Pittsburgh","25","A","305704","156165","143399923","7693613","55.367","2.971","40.439753","-79.976592"
12 | "TX","4817000","02410234","Corpus Christi","25","A","305215","125469","415982136","852055055","160.612","328.980","27.754252","-97.173385"
13 | "CA","0662000","02410965","Riverside","25","A","303871","98444","210152356","788400","81.140","0.304","33.938143","-117.393168"
14 | "OH","3915000","01086201","Cincinnati","25","A","296943","161095","201869928","4155439","77.942","1.604","39.139902","-84.506446"
15 | "KY","2146027","02405089","Lexington-Fayette urban county","UC","A","295803","135160","734648526","4922803","283.649","1.901","38.040157","-84.458443"
16 | "AK","0203000","02419025","Anchorage municipality","37","A","291826","113032","4415108963","663860984","1704.683","256.318","61.177549","-149.274354"
17 | "CA","0675000","02411987","Stockton","25","A","291707","99637","159723404","7984682","61.670","3.083","37.976342","-121.313304"
18 | "OH","3977000","01086537","Toledo","25","A","287208","138039","208991246","8889079","80.692","3.432","41.664071","-83.581861"
19 | "MN","2758000","02396511","St. Paul","25","A","285068","120795","134623737","10875208","51.979","4.199","44.948869","-93.103855"
20 | "NJ","3451000","00885317","Newark","25","A","277140","109520","62643850","4972876","24.187","1.920","40.724220","-74.172574"
21 | "NC","3728000","02403745","Greensboro","25","A","269666","124074","327673360","13690607","126.515","5.286","36.096483","-79.827108"
22 | "NY","3611000","00978764","Buffalo","25","A","261310","133444","104594197","31364094","40.384","12.110","42.892492","-78.859686"
23 | "TX","4858016","02411437","Plano","25","A","259841","103672","185394655","937663","71.581","0.362","33.050769","-96.747944"
24 | "NE","3128000","02395713","Lincoln","25","A","258379","110546","230804010","3229386","89.114","1.247","40.808957","-96.680354"
25 | "NV","3231900","02410741","Henderson","25","A","257729","113586","279023542","0","107.732","0.000","36.012233","-115.037462"
26 | "IN","1825000","02394798","Fort Wayne","25","A","253691","113541","286500436","553423","110.618","0.214","41.088173","-85.143880"
27 | "NJ","3436000","00885264","Jersey","25","A","247597","108720","38315542","16280557","14.794","6.286","40.711417","-74.064760"
28 | "FL","1263000","02405401","St. Petersburg","25","A","244769","129401","159909751","196473878","61.742","75.859","27.761976","-82.644055"
29 | "CA","0613392","02409461","Chula Vista","25","A","243916","79416","128544675","6380068","49.631","2.463","32.627670","-117.015170"
30 | "VA","5157000","01498557","Norfolk","25","A","242803","95018","140171293","109376999","54.120","42.231","36.923015","-76.244641"
31 | "FL","1253000","02404443","Orlando","25","A","238300","121254","265203107","21469603","102.395","8.289","28.415886","-81.298750"
32 | "AZ","0412000","02409433","Chandler","25","A","236123","94404","166828220","289715","64.413","0.112","33.282874","-111.854943"
33 | "TX","4841464","02411626","Laredo","25","A","236091","68610","230271380","3754983","88.908","1.450","27.547681","-99.486931"
34 | "WI","5548000","01583625","Madison","25","A","233209","108843","198882058","44658619","76.789","17.243","43.087806","-89.430121"
35 | "NC","3775000","02405771","Winston-Salem","25","A","229617","103974","343041264","3228612","132.449","1.247","36.103262","-80.260578"
36 | "TX","4845000","02410892","Lubbock","25","A","229573","95926","317041399","2962034","122.410","1.144","33.566479","-101.886677"
37 | "LA","2205000","02403821","Baton Rouge","25","B","229493","100801","199291656","5588234","76.947","2.158","30.448454","-91.125899"
38 | "NC","3719000","02403521","Durham","25","A","228330","103221","278087581","2357401","107.370","0.910","35.980964","-78.905647"
39 | "TX","4829000","02410572","Garland","25","A","226876","80834","147848881","340126","57.085","0.131","32.909826","-96.630357"
40 | "AZ","0427820","02410596","Glendale","25","A","226721","90505","155337275","401624","59.976","0.155","33.533111","-112.189901"
41 | "NV","3260600","02410923","Reno","25","A","225221","102582","266792840","7423507","103.009","2.866","39.474487","-119.776538"
42 | "FL","1230000","02404689","Hialeah","25","A","224669","74067","55554697","3599730","21.450","1.390","25.869941","-80.302865"
43 | "NV","3254600","02409023","Paradise CDP","57","S","223167","114296","120996826","0","46.717","0.000","36.080689","-115.136839"
44 | "VA","5116000","01498558","Chesapeake","25","A","222209","83196","882669156","26052854","340.800","10.059","36.679376","-76.301788"
45 | "AZ","0465000","02411845","Scottsdale","25","A","217385","124001","476350341","1231086","183.920","0.475","33.668727","-111.823682"
46 | "NV","3251800","02411273","North Las Vegas","25","A","216961","76073","262483131","112001","101.345","0.043","36.282974","-115.089262"
47 | "TX","4837000","02410117","Irving","25","A","216290","91128","173573892","2594600","67.017","1.002","32.857748","-96.970022"
48 | "CA","0626000","02410545","Fremont","25","A","214089","73989","200617968","26291598","77.459","10.151","37.494373","-121.941117"
49 | "CA","0636770","02410116","Irvine","25","A","212375","83899","171214072","900908","66.106","0.348","33.678399","-117.771254"
50 | "AL","0107000","02403868","Birmingham","25","A","212237","108981","378310927","6590665","146.067","2.545","33.527444","-86.799047"
51 | "NY","3663000","00979426","Rochester","25","A","210565","97158","92671789","3558427","35.781","1.374","43.169927","-77.616891"
52 | 


--------------------------------------------------------------------------------
/spec/fixtures/cities_with_yml/cities50.csv:
--------------------------------------------------------------------------------
 1 | "USPS","GEOID","ANSICODE","NAME","LSAD","FUNCSTAT","POP10","HU10","ALAND","AWATER","ALAND_SQMI","AWATER_SQMI","INTPTLAT","INTPTLONG"
 2 | "NY","3651000","02395220","New York","25","A","8175133","3371062","783842402","429527437","302.643","165.841","40.664274","-73.938500"
 3 | "CA","0644000","02410877","Los Angeles","25","A","3792621","1413995","1213850147","88119442","468.670","34.023","34.019394","-118.410825"
 4 | "IL","1714000","00428803","Chicago","25","A","2695598","1194337","589571105","16781658","227.635","6.479","41.837551","-87.681844"
 5 | "TX","4835000","02410796","Houston","25","A","2099451","892646","1552929379","72277296","599.589","27.906","29.780472","-95.386342"
 6 | "PA","4260000","01215531","Philadelphia","25","A","1526006","670171","347321129","22289408","134.101","8.606","40.009376","-75.133346"
 7 | "AZ","0455000","02411414","Phoenix","25","A","1445632","590149","1338256106","3221362","516.704","1.244","33.572162","-112.087966"
 8 | "TX","4865000","02411774","San Antonio","25","A","1327407","524246","1193811736","14968532","460.933","5.779","29.472403","-98.525142"
 9 | "CA","0666000","02411782","San Diego","25","A","1307402","516033","842233444","122273028","325.188","47.210","32.815300","-117.134993"
10 | "TX","4819000","02410288","Dallas","25","A","1197816","516639","881939036","117394747","340.519","45.326","32.794176","-96.765503"
11 | "CA","0668000","02411790","San Jose","25","A","945942","314038","457201438","8907829","176.526","3.439","37.296867","-121.819306"
12 | "FL","1235000","02404783","Jacksonville","25","A","821784","366273","1934729255","330550113","747.003","127.626","30.337019","-81.661302"
13 | "IN","1836003","02395424","Indianapolis (balance)","00","F","820445","379856","936107645","17072644","361.433","6.592","39.776664","-86.145935"
14 | "CA","0667000","02411786","San Francisco","25","A","805235","376942","121399963","479190317","46.873","185.016","37.727239","-123.032229"
15 | "TX","4805000","02409761","Austin","25","A","790390","354241","771546901","18560605","297.896","7.166","30.307182","-97.755996"
16 | "OH","3918000","01086101","Columbus","25","A","787033","370965","562466164","15373425","217.169","5.936","39.984799","-82.985044"
17 | "TX","4827000","02410531","Fort Worth","25","A","741206","291086","880127783","20832365","339.819","8.043","32.779542","-97.346335"
18 | "NC","3712000","02404032","Charlotte","25","A","731424","319918","770983559","5148533","297.678","1.988","35.208707","-80.830739"
19 | "MI","2622000","01626181","Detroit","25","A","713777","349170","359361795","10667148","138.750","4.119","42.383037","-83.102237"
20 | "TX","4824000","02410414","El Paso","25","A","649121","227605","661056631","2642037","255.235","1.020","31.848360","-106.426979"
21 | "TN","4748000","02405068","Memphis","25","A","646889","291883","815988030","23176952","315.055","8.949","35.103543","-89.978498"
22 | "MD","2404000","01702381","Baltimore","25","A","620961","296685","209643241","28768302","80.944","11.108","39.300213","-76.610516"
23 | "MA","2507000","00619463","Boston","25","A","617594","272481","125037462","107130299","48.277","41.363","42.331960","-71.020173"
24 | "WA","5363000","02411856","Seattle","25","A","608660","308516","217410345","152055857","83.943","58.709","47.620499","-122.350876"
25 | "DC","1150000","02390665","Washington","25","N","601723","296719","158114680","18884970","61.048","7.292","38.904149","-77.017094"
26 | "TN","4752006","02405092","Nashville-Davidson metropolitan government (balance)","00","F","601222","272622","1230569857","56261385","475.126","21.723","36.171800","-86.785002"
27 | "CO","0820000","02410324","Denver","25","A","600158","285797","396268588","4225359","153.000","1.631","39.761849","-104.880625"
28 | "KY","2148006","01967434","Louisville/Jefferson County metro government (balance)","00","F","597337","270928","842389102","43938966","325.248","16.965","38.178077","-85.666708"
29 | "WI","5553000","01583724","Milwaukee","25","A","594833","255569","248955419","1755108","96.122","0.678","43.063348","-87.966695"
30 | "OR","4159000","02411471","Portland","25","A","583776","265439","345573252","30196598","133.427","11.659","45.536951","-122.649971"
31 | "NV","3240000","02411630","Las Vegas","25","A","583756","243701","351758506","133986","135.815","0.052","36.227712","-115.264045"
32 | "OK","4055000","02411311","Oklahoma","25","A","579999","256930","1570595903","37324163","606.410","14.411","35.467079","-97.513657"
33 | "NM","3502000","02409678","Albuquerque","25","A","545852","239166","486218256","4702696","187.730","1.816","35.105552","-106.647388"
34 | "AZ","0477000","02412104","Tucson","25","A","520116","229762","587173307","842214","226.709","0.325","32.154289","-110.871062"
35 | "CA","0627000","02410546","Fresno","25","A","494665","171288","289966538","910373","111.957","0.351","36.782674","-119.794492"
36 | "CA","0664000","02411751","Sacramento","25","A","466488","190911","253599699","5673097","97.915","2.190","38.566592","-121.468632"
37 | "CA","0643000","02410866","Long Beach","25","A","462257","176032","130259313","2963688","50.293","1.144","33.809102","-118.155327"
38 | "MO","2938000","02395492","Kansas","25","A","459787","221860","815717156","10575319","314.950","4.083","39.125212","-94.551136"
39 | "AZ","0446000","02411087","Mesa","25","A","439041","201173","353409966","1580758","136.452","0.610","33.401926","-111.717379"
40 | "VA","5182000","01498559","Virginia Beach","25","A","437994","177879","644948896","643150238","249.016","248.322","36.779322","-76.024020"
41 | "GA","1304000","02403126","Atlanta","25","A","420003","224573","344861307","2219186","133.152","0.857","33.762909","-84.422675"
42 | "CO","0816000","02410198","Colorado Springs","25","A","416427","179607","503857525","947180","194.540","0.366","38.867255","-104.760749"
43 | "NE","3137000","02396064","Omaha","25","A","408958","177518","329157346","9036153","127.088","3.489","41.264675","-96.041927"
44 | "NC","3755000","02404590","Raleigh","25","A","403892","176124","370117231","2846320","142.903","1.099","35.830204","-78.641439"
45 | "FL","1245000","02404247","Miami","25","A","399457","183994","92905577","52298641","35.871","20.193","25.775163","-80.208615"
46 | "OH","3916000","01085963","Cleveland","25","A","396815","207536","201234202","12351419","77.697","4.769","41.478138","-81.679486"
47 | "OK","4075000","02412110","Tulsa","25","A","391906","185127","509590364","10967865","196.754","4.235","36.127949","-95.902316"
48 | "CA","0653000","02411292","Oakland","25","A","390724","169710","144484543","57539591","55.786","22.216","37.769857","-122.225640"
49 | "MN","2743000","02395345","Minneapolis","25","A","382578","178287","139789184","9052448","53.973","3.495","44.963323","-93.268284"
50 | "KS","2079000","00485662","Wichita","25","A","382368","167310","412571486","11136472","159.295","4.300","37.690694","-97.342678"
51 | "PR","7276770","02414943","San Juan zona urbana","62","S","381931","194316","102305007","17914425","39.500","6.917","18.406409","-66.064004"
52 | 


--------------------------------------------------------------------------------
/spec/fixtures/cities_without_yml/cities50.csv:
--------------------------------------------------------------------------------
 1 | "USPS","GEOID","ANSICODE","NAME","LSAD","FUNCSTAT","POP10","HU10","ALAND","AWATER","ALAND_SQMI","AWATER_SQMI","INTPTLAT","INTPTLONG"
 2 | "NY","3651000","02395220","New York","25","A","8175133","3371062","783842402","429527437","302.643","165.841","40.664274","-73.938500"
 3 | "CA","0644000","02410877","Los Angeles","25","A","3792621","1413995","1213850147","88119442","468.670","34.023","34.019394","-118.410825"
 4 | "IL","1714000","00428803","Chicago","25","A","2695598","1194337","589571105","16781658","227.635","6.479","41.837551","-87.681844"
 5 | "TX","4835000","02410796","Houston","25","A","2099451","892646","1552929379","72277296","599.589","27.906","29.780472","-95.386342"
 6 | "PA","4260000","01215531","Philadelphia","25","A","1526006","670171","347321129","22289408","134.101","8.606","40.009376","-75.133346"
 7 | "AZ","0455000","02411414","Phoenix","25","A","1445632","590149","1338256106","3221362","516.704","1.244","33.572162","-112.087966"
 8 | "TX","4865000","02411774","San Antonio","25","A","1327407","524246","1193811736","14968532","460.933","5.779","29.472403","-98.525142"
 9 | "CA","0666000","02411782","San Diego","25","A","1307402","516033","842233444","122273028","325.188","47.210","32.815300","-117.134993"
10 | "TX","4819000","02410288","Dallas","25","A","1197816","516639","881939036","117394747","340.519","45.326","32.794176","-96.765503"
11 | "CA","0668000","02411790","San Jose","25","A","945942","314038","457201438","8907829","176.526","3.439","37.296867","-121.819306"
12 | "FL","1235000","02404783","Jacksonville","25","A","821784","366273","1934729255","330550113","747.003","127.626","30.337019","-81.661302"
13 | "IN","1836003","02395424","Indianapolis (balance)","00","F","820445","379856","936107645","17072644","361.433","6.592","39.776664","-86.145935"
14 | "CA","0667000","02411786","San Francisco","25","A","805235","376942","121399963","479190317","46.873","185.016","37.727239","-123.032229"
15 | "TX","4805000","02409761","Austin","25","A","790390","354241","771546901","18560605","297.896","7.166","30.307182","-97.755996"
16 | "OH","3918000","01086101","Columbus","25","A","787033","370965","562466164","15373425","217.169","5.936","39.984799","-82.985044"
17 | "TX","4827000","02410531","Fort Worth","25","A","741206","291086","880127783","20832365","339.819","8.043","32.779542","-97.346335"
18 | "NC","3712000","02404032","Charlotte","25","A","731424","319918","770983559","5148533","297.678","1.988","35.208707","-80.830739"
19 | "MI","2622000","01626181","Detroit","25","A","713777","349170","359361795","10667148","138.750","4.119","42.383037","-83.102237"
20 | "TX","4824000","02410414","El Paso","25","A","649121","227605","661056631","2642037","255.235","1.020","31.848360","-106.426979"
21 | "TN","4748000","02405068","Memphis","25","A","646889","291883","815988030","23176952","315.055","8.949","35.103543","-89.978498"
22 | "MD","2404000","01702381","Baltimore","25","A","620961","296685","209643241","28768302","80.944","11.108","39.300213","-76.610516"
23 | "MA","2507000","00619463","Boston","25","A","617594","272481","125037462","107130299","48.277","41.363","42.331960","-71.020173"
24 | "WA","5363000","02411856","Seattle","25","A","608660","308516","217410345","152055857","83.943","58.709","47.620499","-122.350876"
25 | "DC","1150000","02390665","Washington","25","N","601723","296719","158114680","18884970","61.048","7.292","38.904149","-77.017094"
26 | "TN","4752006","02405092","Nashville-Davidson metropolitan government (balance)","00","F","601222","272622","1230569857","56261385","475.126","21.723","36.171800","-86.785002"
27 | "CO","0820000","02410324","Denver","25","A","600158","285797","396268588","4225359","153.000","1.631","39.761849","-104.880625"
28 | "KY","2148006","01967434","Louisville/Jefferson County metro government (balance)","00","F","597337","270928","842389102","43938966","325.248","16.965","38.178077","-85.666708"
29 | "WI","5553000","01583724","Milwaukee","25","A","594833","255569","248955419","1755108","96.122","0.678","43.063348","-87.966695"
30 | "OR","4159000","02411471","Portland","25","A","583776","265439","345573252","30196598","133.427","11.659","45.536951","-122.649971"
31 | "NV","3240000","02411630","Las Vegas","25","A","583756","243701","351758506","133986","135.815","0.052","36.227712","-115.264045"
32 | "OK","4055000","02411311","Oklahoma","25","A","579999","256930","1570595903","37324163","606.410","14.411","35.467079","-97.513657"
33 | "NM","3502000","02409678","Albuquerque","25","A","545852","239166","486218256","4702696","187.730","1.816","35.105552","-106.647388"
34 | "AZ","0477000","02412104","Tucson","25","A","520116","229762","587173307","842214","226.709","0.325","32.154289","-110.871062"
35 | "CA","0627000","02410546","Fresno","25","A","494665","171288","289966538","910373","111.957","0.351","36.782674","-119.794492"
36 | "CA","0664000","02411751","Sacramento","25","A","466488","190911","253599699","5673097","97.915","2.190","38.566592","-121.468632"
37 | "CA","0643000","02410866","Long Beach","25","A","462257","176032","130259313","2963688","50.293","1.144","33.809102","-118.155327"
38 | "MO","2938000","02395492","Kansas","25","A","459787","221860","815717156","10575319","314.950","4.083","39.125212","-94.551136"
39 | "AZ","0446000","02411087","Mesa","25","A","439041","201173","353409966","1580758","136.452","0.610","33.401926","-111.717379"
40 | "VA","5182000","01498559","Virginia Beach","25","A","437994","177879","644948896","643150238","249.016","248.322","36.779322","-76.024020"
41 | "GA","1304000","02403126","Atlanta","25","A","420003","224573","344861307","2219186","133.152","0.857","33.762909","-84.422675"
42 | "CO","0816000","02410198","Colorado Springs","25","A","416427","179607","503857525","947180","194.540","0.366","38.867255","-104.760749"
43 | "NE","3137000","02396064","Omaha","25","A","408958","177518","329157346","9036153","127.088","3.489","41.264675","-96.041927"
44 | "NC","3755000","02404590","Raleigh","25","A","403892","176124","370117231","2846320","142.903","1.099","35.830204","-78.641439"
45 | "FL","1245000","02404247","Miami","25","A","399457","183994","92905577","52298641","35.871","20.193","25.775163","-80.208615"
46 | "OH","3916000","01085963","Cleveland","25","A","396815","207536","201234202","12351419","77.697","4.769","41.478138","-81.679486"
47 | "OK","4075000","02412110","Tulsa","25","A","391906","185127","509590364","10967865","196.754","4.235","36.127949","-95.902316"
48 | "CA","0653000","02411292","Oakland","25","A","390724","169710","144484543","57539591","55.786","22.216","37.769857","-122.225640"
49 | "MN","2743000","02395345","Minneapolis","25","A","382578","178287","139789184","9052448","53.973","3.495","44.963323","-93.268284"
50 | "KS","2079000","00485662","Wichita","25","A","382368","167310","412571486","11136472","159.295","4.300","37.690694","-97.342678"
51 | "PR","7276770","02414943","San Juan zona urbana","62","S","381931","194316","102305007","17914425","39.500","6.917","18.406409","-66.064004"
52 | 


--------------------------------------------------------------------------------
/spec/fixtures/import_with_dictionary/cities50.csv:
--------------------------------------------------------------------------------
 1 | "USPS","GEOID","ANSICODE","NAME","LSAD","FUNCSTAT","POP10","HU10","ALAND","AWATER","ALAND_SQMI","AWATER_SQMI","INTPTLAT","INTPTLONG"
 2 | "NY","3651000","02395220","New York","25","A","8175133","3371062","783842402","429527437","302.643","165.841","40.664274","-73.938500"
 3 | "CA","0644000","02410877","Los Angeles","25","A","3792621","1413995","1213850147","88119442","468.670","34.023","34.019394","-118.410825"
 4 | "IL","1714000","00428803","Chicago","25","A","2695598","1194337","589571105","16781658","227.635","6.479","41.837551","-87.681844"
 5 | "TX","4835000","02410796","Houston","25","A","2099451","892646","1552929379","72277296","599.589","27.906","29.780472","-95.386342"
 6 | "PA","4260000","01215531","Philadelphia","25","A","1526006","670171","347321129","22289408","134.101","8.606","40.009376","-75.133346"
 7 | "AZ","0455000","02411414","Phoenix","25","A","1445632","590149","1338256106","3221362","516.704","1.244","33.572162","-112.087966"
 8 | "TX","4865000","02411774","San Antonio","25","A","1327407","524246","1193811736","14968532","460.933","5.779","29.472403","-98.525142"
 9 | "CA","0666000","02411782","San Diego","25","A","1307402","516033","842233444","122273028","325.188","47.210","32.815300","-117.134993"
10 | "TX","4819000","02410288","Dallas","25","A","1197816","516639","881939036","117394747","340.519","45.326","32.794176","-96.765503"
11 | "CA","0668000","02411790","San Jose","25","A","945942","314038","457201438","8907829","176.526","3.439","37.296867","-121.819306"
12 | "FL","1235000","02404783","Jacksonville","25","A","821784","366273","1934729255","330550113","747.003","127.626","30.337019","-81.661302"
13 | "IN","1836003","02395424","Indianapolis (balance)","00","F","820445","379856","936107645","17072644","361.433","6.592","39.776664","-86.145935"
14 | "CA","0667000","02411786","San Francisco","25","A","805235","376942","121399963","479190317","46.873","185.016","37.727239","-123.032229"
15 | "TX","4805000","02409761","Austin","25","A","790390","354241","771546901","18560605","297.896","7.166","30.307182","-97.755996"
16 | "OH","3918000","01086101","Columbus","25","A","787033","370965","562466164","15373425","217.169","5.936","39.984799","-82.985044"
17 | "TX","4827000","02410531","Fort Worth","25","A","741206","291086","880127783","20832365","339.819","8.043","32.779542","-97.346335"
18 | "NC","3712000","02404032","Charlotte","25","A","731424","319918","770983559","5148533","297.678","1.988","35.208707","-80.830739"
19 | "MI","2622000","01626181","Detroit","25","A","713777","349170","359361795","10667148","138.750","4.119","42.383037","-83.102237"
20 | "TX","4824000","02410414","El Paso","25","A","649121","227605","661056631","2642037","255.235","1.020","31.848360","-106.426979"
21 | "TN","4748000","02405068","Memphis","25","A","646889","291883","815988030","23176952","315.055","8.949","35.103543","-89.978498"
22 | "MD","2404000","01702381","Baltimore","25","A","620961","296685","209643241","28768302","80.944","11.108","39.300213","-76.610516"
23 | "MA","2507000","00619463","Boston","25","A","617594","272481","125037462","107130299","48.277","41.363","42.331960","-71.020173"
24 | "WA","5363000","02411856","Seattle","25","A","608660","308516","217410345","152055857","83.943","58.709","47.620499","-122.350876"
25 | "DC","1150000","02390665","Washington","25","N","601723","296719","158114680","18884970","61.048","7.292","38.904149","-77.017094"
26 | "TN","4752006","02405092","Nashville-Davidson metropolitan government (balance)","00","F","601222","272622","1230569857","56261385","475.126","21.723","36.171800","-86.785002"
27 | "CO","0820000","02410324","Denver","25","A","600158","285797","396268588","4225359","153.000","1.631","39.761849","-104.880625"
28 | "KY","2148006","01967434","Louisville/Jefferson County metro government (balance)","00","F","597337","270928","842389102","43938966","325.248","16.965","38.178077","-85.666708"
29 | "WI","5553000","01583724","Milwaukee","25","A","594833","255569","248955419","1755108","96.122","0.678","43.063348","-87.966695"
30 | "OR","4159000","02411471","Portland","25","A","583776","265439","345573252","30196598","133.427","11.659","45.536951","-122.649971"
31 | "NV","3240000","02411630","Las Vegas","25","A","583756","243701","351758506","133986","135.815","0.052","36.227712","-115.264045"
32 | "OK","4055000","02411311","Oklahoma","25","A","579999","256930","1570595903","37324163","606.410","14.411","35.467079","-97.513657"
33 | "NM","3502000","02409678","Albuquerque","25","A","545852","239166","486218256","4702696","187.730","1.816","35.105552","-106.647388"
34 | "AZ","0477000","02412104","Tucson","25","A","520116","229762","587173307","842214","226.709","0.325","32.154289","-110.871062"
35 | "CA","0627000","02410546","Fresno","25","A","494665","171288","289966538","910373","111.957","0.351","36.782674","-119.794492"
36 | "CA","0664000","02411751","Sacramento","25","A","466488","190911","253599699","5673097","97.915","2.190","38.566592","-121.468632"
37 | "CA","0643000","02410866","Long Beach","25","A","462257","176032","130259313","2963688","50.293","1.144","33.809102","-118.155327"
38 | "MO","2938000","02395492","Kansas","25","A","459787","221860","815717156","10575319","314.950","4.083","39.125212","-94.551136"
39 | "AZ","0446000","02411087","Mesa","25","A","439041","201173","353409966","1580758","136.452","0.610","33.401926","-111.717379"
40 | "VA","5182000","01498559","Virginia Beach","25","A","437994","177879","644948896","643150238","249.016","248.322","36.779322","-76.024020"
41 | "GA","1304000","02403126","Atlanta","25","A","420003","224573","344861307","2219186","133.152","0.857","33.762909","-84.422675"
42 | "CO","0816000","02410198","Colorado Springs","25","A","416427","179607","503857525","947180","194.540","0.366","38.867255","-104.760749"
43 | "NE","3137000","02396064","Omaha","25","A","408958","177518","329157346","9036153","127.088","3.489","41.264675","-96.041927"
44 | "NC","3755000","02404590","Raleigh","25","A","403892","176124","370117231","2846320","142.903","1.099","35.830204","-78.641439"
45 | "FL","1245000","02404247","Miami","25","A","399457","183994","92905577","52298641","35.871","20.193","25.775163","-80.208615"
46 | "OH","3916000","01085963","Cleveland","25","A","396815","207536","201234202","12351419","77.697","4.769","41.478138","-81.679486"
47 | "OK","4075000","02412110","Tulsa","25","A","391906","185127","509590364","10967865","196.754","4.235","36.127949","-95.902316"
48 | "CA","0653000","02411292","Oakland","25","A","390724","169710","144484543","57539591","55.786","22.216","37.769857","-122.225640"
49 | "MN","2743000","02395345","Minneapolis","25","A","382578","178287","139789184","9052448","53.973","3.495","44.963323","-93.268284"
50 | "KS","2079000","00485662","Wichita","25","A","382368","167310","412571486","11136472","159.295","4.300","37.690694","-97.342678"
51 | "PR","7276770","02414943","San Juan zona urbana","62","S","381931","194316","102305007","17914425","39.500","6.917","18.406409","-66.064004"
52 | 


--------------------------------------------------------------------------------
/lib/data_magic/index/document_builder.rb:
--------------------------------------------------------------------------------
  1 | require './lib/expression/expression'
  2 | 
  3 | module DataMagic
  4 |   module Index
  5 |     module DocumentBuilder
  6 |       class << self
  7 |         def logger
  8 |           DataMagic::Config.logger
  9 |         end
 10 | 
 11 |         # build a nested json document from a csv row
 12 |         # row: a hash  { column_name => value }
 13 |         #      where all column_names and values are strings
 14 |         # fields: column_name => field_name
 15 |         # config: DataMagic.Config instance for dictionary, column types, NULL
 16 |         def build(row, builder_data, config)
 17 |           fields = builder_data.new_field_names
 18 |           options = builder_data.options
 19 |           additional = builder_data.additional_data
 20 |           csv_row = map_column_types(row.to_hash, config)
 21 |           if fields.empty?
 22 |             field_values = csv_row
 23 |           else
 24 |             field_values = map_field_names(csv_row, fields, options)
 25 |           end
 26 |           field_values.merge!(calculated_fields(csv_row, config))
 27 |           field_values.merge!(lowercase_columns(field_values, config.column_field_types))
 28 |           field_values.merge!(additional) if additional
 29 |           doc = NestedHash.new.add(field_values)
 30 |           doc = parse_nested(doc, options) if options[:nest]
 31 |           doc = select_only_fields(doc, options[:only]) unless options[:only].nil?
 32 |           doc
 33 |         end
 34 | 
 35 |         def create(*args)
 36 |           Document.new(
 37 |             build(*args)
 38 |           )
 39 |         end
 40 | 
 41 |         private
 42 | 
 43 |         def calculated_fields(row, config)
 44 |           result = {}
 45 |           config.calculated_field_list.each do |name|
 46 |             result[name] = calculate(name, row, config.dictionary)
 47 |           end
 48 |           result
 49 |         end
 50 | 
 51 |         # row: a hash  (keys may be strings or symbols)
 52 |         # valid_types: an array of allowed types
 53 |         # field_types: hash field_name : type (float, integer, string)
 54 |         # returns a hash where values have been coerced to the new type
 55 |         # TODO: move type validation to config load time instead
 56 |         def map_column_types(row, config)
 57 |           valid_types = config.valid_types
 58 |           null_value = config.null_value || null_value = 'NULL'
 59 | 
 60 |           mapped = {}
 61 |           row.each do |key, value|
 62 |             if value == null_value
 63 |               mapped[key] = nil
 64 |             else
 65 |               type = config.csv_column_type(key)
 66 |               if valid_types.include? type
 67 |                 mapped[key] = fix_field_type(type, value, key)
 68 |               else
 69 |                 fail InvalidDictionary, "unexpected type '#{type.inspect}' for field '#{key}'"
 70 |               end
 71 |             end
 72 |           end
 73 |           mapped
 74 |         end
 75 | 
 76 |         def lowercase_columns(row, field_types = {})
 77 |           new_columns = {}
 78 |           row.each do |key, value|
 79 |             type = field_types[key.to_sym] || field_types[key.to_s]
 80 |             new_columns["_#{key}"] = value.downcase if type == "name" || type == "autocomplete"
 81 |           end
 82 |           new_columns
 83 |         end
 84 | 
 85 |         def parse_nested(document, options)
 86 |           new_doc = {}
 87 |           nest_options = options[:nest]
 88 |           if nest_options
 89 |             key = nest_options['key']
 90 |             new_doc[key] = {}
 91 |             new_doc['id'] = document['id'] unless document['id'].nil?
 92 |             nest_options['contents'].each do |item_key|
 93 |               new_doc[key][item_key] = document[item_key]
 94 |             end
 95 |           end
 96 |           new_doc
 97 |         end
 98 | 
 99 |         def fix_field_type(type, value, key=nil)
100 |           return value if value.nil?
101 | 
102 |           new_value = case type
103 |                       when "float"
104 |                         value.to_f
105 |                       when "integer"
106 |                         value.to_i
107 |                       when "lowercase_name"
108 |                         value.to_s.downcase
109 |                       when "boolean"
110 |                         parse_boolean(value)
111 |                       else # "string"
112 |                         value.to_s
113 |           end
114 |           new_value = value.to_f if key and key.to_s.include? "location"
115 |           new_value
116 |         end
117 | 
118 |         def parse_boolean(value)
119 |           case value
120 |           when "true"
121 |             true
122 |           when "false"
123 |             false
124 |           when 0
125 |             false
126 |           else
127 |             !!value
128 |           end
129 |         end
130 | 
131 |         # currently we just support 'or' operations on two columns
132 |         def calculate(field_name, row, dictionary)
133 |           item = dictionary[field_name.to_s] || dictionary[field_name.to_sym]
134 |           type = item['type'] || item[:type]
135 |           fail "calculate: field not found in dictionary #{field_name.inspect}" if item.nil?
136 |           expr = item['calculate'] || item[:calculate]
137 |           fail ArgumentError, "expected to calculate #{field_name}" if expr.nil?
138 |           e = Expression.find_or_create(expr)
139 |           vars = {}
140 |           e.variables.each do |name|
141 |             vars[name] = fix_field_type(type, row[name.to_sym])
142 |           end
143 |           fix_field_type(type, e.evaluate(vars))
144 |         end
145 | 
146 |         # row: a hash  (keys may be strings or symbols)
147 |         # new_fields: hash current_name : new_name
148 |         # returns a hash (which may be a subset of row) where keys are new_name
149 |         #         with value of corresponding row[current_name]
150 |         def map_field_names(row, new_fields, options = {})
151 |           mapped = {}
152 |           row.each do |key, value|
153 |             fail ArgumentError, "column header missing for: #{value}" if key.nil?
154 |             new_key = new_fields[key.to_sym] || new_fields[key.to_s]
155 |             if new_key
156 |               value = value.to_f if new_key.include? "location"
157 |               mapped[new_key] = value
158 |             elsif options[:columns] == 'all'
159 |               mapped[key] = value
160 |             end
161 |           end
162 |           mapped
163 |         end
164 | 
165 |         # select top-level fields from a hash
166 |         # if there are name types, also select _name
167 |         # doc: hash with string keys
168 |         # only_keys: array of keys
169 |         def select_only_fields(doc, only_keys)
170 |           doc = doc.select do |key, value|
171 |             key = key.to_s
172 |             # if key has _ prefix, select if key present without _
173 |             key = key[1..-1] if key[0] == '_'
174 |             only_keys.include?(key)
175 |           end
176 |         end
177 | 
178 |       end # class methods
179 |     end # module QueryBuilder
180 |   end
181 | end  # module DataMagic
182 | 


--------------------------------------------------------------------------------
/Gemfile.lock:
--------------------------------------------------------------------------------
  1 | GEM
  2 |   remote: https://rubygems.org/
  3 |   specs:
  4 |     actionview (4.2.3)
  5 |       activesupport (= 4.2.3)
  6 |       builder (~> 3.1)
  7 |       erubis (~> 2.7.0)
  8 |       rails-dom-testing (~> 1.0, >= 1.0.5)
  9 |       rails-html-sanitizer (~> 1.0, >= 1.0.2)
 10 |     activesupport (4.2.3)
 11 |       i18n (~> 0.7)
 12 |       json (~> 1.7, >= 1.7.7)
 13 |       minitest (~> 5.1)
 14 |       thread_safe (~> 0.3, >= 0.3.4)
 15 |       tzinfo (~> 1.1)
 16 |     addressable (2.3.8)
 17 |     autoparse (0.3.3)
 18 |       addressable (>= 2.3.1)
 19 |       extlib (>= 0.9.15)
 20 |       multi_json (>= 1.0.0)
 21 |     aws-sdk (2.1.11)
 22 |       aws-sdk-resources (= 2.1.11)
 23 |     aws-sdk-core (2.1.11)
 24 |       jmespath (~> 1.0)
 25 |     aws-sdk-resources (2.1.11)
 26 |       aws-sdk-core (= 2.1.11)
 27 |     axiom-types (0.1.1)
 28 |       descendants_tracker (~> 0.0.4)
 29 |       ice_nine (~> 0.11.0)
 30 |       thread_safe (~> 0.3, >= 0.3.1)
 31 |     blankslate (2.1.2.4)
 32 |     builder (3.2.2)
 33 |     byebug (5.0.0)
 34 |       columnize (= 0.9.0)
 35 |     cf-app-utils (0.4)
 36 |     coderay (1.1.0)
 37 |     coercible (1.0.0)
 38 |       descendants_tracker (~> 0.0.1)
 39 |     columnize (0.9.0)
 40 |     descendants_tracker (0.0.4)
 41 |       thread_safe (~> 0.3, >= 0.3.1)
 42 |     diff-lcs (1.2.5)
 43 |     dotenv (2.0.2)
 44 |     elasticsearch (1.0.12)
 45 |       elasticsearch-api (= 1.0.12)
 46 |       elasticsearch-transport (= 1.0.12)
 47 |     elasticsearch-api (1.0.12)
 48 |       multi_json
 49 |     elasticsearch-transport (1.0.12)
 50 |       faraday
 51 |       multi_json
 52 |     equalizer (0.0.11)
 53 |     erubis (2.7.0)
 54 |     excon (0.45.4)
 55 |     extlib (0.9.16)
 56 |     faraday (0.9.1)
 57 |       multipart-post (>= 1.2, < 3)
 58 |     google-api-client (0.8.2)
 59 |       activesupport (>= 3.2)
 60 |       addressable (~> 2.3)
 61 |       autoparse (~> 0.3)
 62 |       extlib (~> 0.9)
 63 |       faraday (~> 0.9)
 64 |       launchy (~> 2.4)
 65 |       multi_json (~> 1.10)
 66 |       retriable (~> 1.4)
 67 |       signet (~> 0.6)
 68 |     google_drive (1.0.1)
 69 |       google-api-client (>= 0.7.0)
 70 |       nokogiri (>= 1.4.4, != 1.5.2, != 1.5.1)
 71 |       oauth (>= 0.3.6)
 72 |       oauth2 (>= 0.5.0)
 73 |     hashie (3.4.2)
 74 |     http_router (0.11.2)
 75 |       rack (>= 1.0.0)
 76 |       url_mount (~> 0.2.1)
 77 |     i18n (0.7.0)
 78 |     ice_nine (0.11.1)
 79 |     jmespath (1.0.2)
 80 |       multi_json (~> 1.0)
 81 |     json (1.8.3)
 82 |     jwt (1.5.1)
 83 |     launchy (2.4.3)
 84 |       addressable (~> 2.3)
 85 |     liquid (3.0.3)
 86 |     liquify (0.2.7)
 87 |       liquid (>= 2.2.2)
 88 |     loofah (2.0.3)
 89 |       nokogiri (>= 1.5.9)
 90 |     mail (2.5.4)
 91 |       mime-types (~> 1.16)
 92 |       treetop (~> 1.4.8)
 93 |     method_source (0.8.2)
 94 |     mime-types (1.25.1)
 95 |     mini_portile2 (2.1.0)
 96 |     minitest (5.8.0)
 97 |     moneta (0.7.20)
 98 |     multi_json (1.11.2)
 99 |     multi_xml (0.5.5)
100 |     multipart-post (2.0.0)
101 |     newrelic_rpm (3.14.2.312)
102 |     nokogiri (1.6.8)
103 |       mini_portile2 (~> 2.1.0)
104 |       pkg-config (~> 1.1.7)
105 |     oauth (0.4.7)
106 |     oauth2 (1.0.0)
107 |       faraday (>= 0.8, < 0.10)
108 |       jwt (~> 1.0)
109 |       multi_json (~> 1.3)
110 |       multi_xml (~> 0.5)
111 |       rack (~> 1.2)
112 |     oj (2.12.13)
113 |     padrino (0.12.5)
114 |       padrino-admin (= 0.12.5)
115 |       padrino-cache (= 0.12.5)
116 |       padrino-core (= 0.12.5)
117 |       padrino-gen (= 0.12.5)
118 |       padrino-helpers (= 0.12.5)
119 |       padrino-mailer (= 0.12.5)
120 |       padrino-support (= 0.12.5)
121 |     padrino-admin (0.12.5)
122 |       padrino-core (= 0.12.5)
123 |       padrino-helpers (= 0.12.5)
124 |     padrino-cache (0.12.5)
125 |       moneta (~> 0.7.0)
126 |       padrino-core (= 0.12.5)
127 |       padrino-helpers (= 0.12.5)
128 |     padrino-core (0.12.5)
129 |       activesupport (>= 3.1)
130 |       http_router (~> 0.11.0)
131 |       padrino-support (= 0.12.5)
132 |       rack (< 1.6.0)
133 |       rack-protection (>= 1.5.0)
134 |       sinatra (~> 1.4.2)
135 |       thor (~> 0.18)
136 |     padrino-gen (0.12.5)
137 |       bundler (~> 1.0)
138 |       padrino-core (= 0.12.5)
139 |     padrino-helpers (0.12.5)
140 |       i18n (~> 0.6, >= 0.6.7)
141 |       padrino-support (= 0.12.5)
142 |       tilt (~> 1.4.1)
143 |     padrino-mailer (0.12.5)
144 |       mail (~> 2.5.3)
145 |       padrino-core (= 0.12.5)
146 |     padrino-support (0.12.5)
147 |       activesupport (>= 3.1)
148 |     parallel (1.6.1)
149 |     parslet (1.7.1)
150 |       blankslate (>= 2.0, <= 4.0)
151 |     pkg-config (1.1.7)
152 |     polyglot (0.3.5)
153 |     pry (0.10.1)
154 |       coderay (~> 1.1.0)
155 |       method_source (~> 0.8.1)
156 |       slop (~> 3.4)
157 |     pry-byebug (3.2.0)
158 |       byebug (~> 5.0)
159 |       pry (~> 0.10)
160 |     puma (2.15.3)
161 |     rack (1.5.5)
162 |     rack-protection (1.5.3)
163 |       rack
164 |     rack-test (0.6.3)
165 |       rack (>= 1.0)
166 |     rails-deprecated_sanitizer (1.0.3)
167 |       activesupport (>= 4.2.0.alpha)
168 |     rails-dom-testing (1.0.6)
169 |       activesupport (>= 4.2.0.beta, < 5.0)
170 |       nokogiri (~> 1.6.0)
171 |       rails-deprecated_sanitizer (>= 1.0.1)
172 |     rails-html-sanitizer (1.0.3)
173 |       loofah (~> 2.0)
174 |     rake (10.4.2)
175 |     retriable (1.4.1)
176 |     rspec (3.3.0)
177 |       rspec-core (~> 3.3.0)
178 |       rspec-expectations (~> 3.3.0)
179 |       rspec-mocks (~> 3.3.0)
180 |     rspec-core (3.3.2)
181 |       rspec-support (~> 3.3.0)
182 |     rspec-expectations (3.3.1)
183 |       diff-lcs (>= 1.2.0, < 2.0)
184 |       rspec-support (~> 3.3.0)
185 |     rspec-mocks (3.3.2)
186 |       diff-lcs (>= 1.2.0, < 2.0)
187 |       rspec-support (~> 3.3.0)
188 |     rspec-support (3.3.0)
189 |     ruby-prof (0.15.9)
190 |     safe_yaml (1.0.4)
191 |     sass (3.4.16)
192 |     signet (0.6.1)
193 |       addressable (~> 2.3)
194 |       extlib (~> 0.9)
195 |       faraday (~> 0.9)
196 |       jwt (~> 1.5)
197 |       multi_json (~> 1.10)
198 |     sinatra (1.4.6)
199 |       rack (~> 1.4)
200 |       rack-protection (~> 1.4)
201 |       tilt (>= 1.3, < 3)
202 |     slop (3.6.0)
203 |     stretchy (0.4.7)
204 |       elasticsearch (~> 1.0)
205 |       excon (~> 0.45)
206 |       valid (~> 0.5)
207 |       virtus (~> 1.0)
208 |     thor (0.19.1)
209 |     thread_safe (0.3.5)
210 |     tilt (1.4.1)
211 |     treetop (1.4.15)
212 |       polyglot
213 |       polyglot (>= 0.3.1)
214 |     tzinfo (1.2.2)
215 |       thread_safe (~> 0.1)
216 |     url_mount (0.2.1)
217 |       rack
218 |     valid (0.5.0)
219 |     virtus (1.0.5)
220 |       axiom-types (~> 0.1)
221 |       coercible (~> 1.0)
222 |       descendants_tracker (~> 0.0, >= 0.0.3)
223 |       equalizer (~> 0.0, >= 0.0.9)
224 | 
225 | PLATFORMS
226 |   ruby
227 | 
228 | DEPENDENCIES
229 |   actionview
230 |   aws-sdk (~> 2)
231 |   cf-app-utils
232 |   dotenv
233 |   elasticsearch
234 |   erubis
235 |   google_drive
236 |   hashie
237 |   liquid (= 3.0.3)
238 |   liquify
239 |   newrelic_rpm
240 |   oj
241 |   padrino (= 0.12.5)
242 |   parallel
243 |   parslet
244 |   pry
245 |   pry-byebug
246 |   puma
247 |   rack-test
248 |   rake
249 |   rspec
250 |   rspec-mocks
251 |   ruby-prof
252 |   safe_yaml
253 |   sass
254 |   stretchy
255 | 
256 | BUNDLED WITH
257 |    1.11.2
258 | 


--------------------------------------------------------------------------------