├── datafile ├── test │ ├── datafile │ │ ├── world.rb │ │ ├── stadiums.rb │ │ └── eurocup.rb │ ├── helper.rb │ ├── test_builder.rb │ ├── test_file_dataset_registry.rb │ ├── test_file_worker.rb │ └── test_football_dataset.rb ├── CHANGELOG.md ├── .gitignore ├── Manifest.txt ├── attic │ ├── lib │ │ ├── workers │ │ │ ├── datafile.rb │ │ │ ├── file │ │ │ │ └── dataset.rb │ │ │ ├── beer.rb │ │ │ ├── football.rb │ │ │ └── world.rb │ │ ├── file_worker.rb │ │ ├── zip_worker.rb │ │ ├── builder.rb │ │ ├── dataset.rb │ │ ├── builder2.rb │ │ └── datafile.rb │ └── test │ │ ├── test_builder2.rb │ │ └── datafile2 │ │ └── at.rb ├── lib │ ├── datafile │ │ ├── version.rb │ │ ├── workers │ │ │ ├── file │ │ │ │ ├── worker.rb │ │ │ │ ├── dataset.rb │ │ │ │ └── registry.rb │ │ │ └── zip │ │ │ │ ├── worker.rb │ │ │ │ └── dataset.rb │ │ ├── builder.rb │ │ ├── datafile.rb │ │ └── dataset.rb │ └── datafile.rb ├── README.md ├── Rakefile ├── data │ └── football.txt └── NOTES.md ├── dataman ├── .gitignore ├── HISTORY.md ├── Manifest.txt ├── test │ ├── helper.rb │ └── test_sqlite.rb ├── lib │ ├── dataman.rb │ └── dataman │ │ ├── version.rb │ │ └── dataman.rb ├── Rakefile └── README.md ├── README.md └── LICENSE.md /datafile/test/datafile/world.rb: -------------------------------------------------------------------------------- 1 | 2 | world 'world.db', setup: 'countries' 3 | 4 | -------------------------------------------------------------------------------- /dataman/.gitignore: -------------------------------------------------------------------------------- 1 | # ignore ruby rake generated folders 2 | 3 | pkg/ 4 | doc/ 5 | 6 | 7 | -------------------------------------------------------------------------------- /dataman/HISTORY.md: -------------------------------------------------------------------------------- 1 | ### 0.0.1 / 2015-04-08 2 | 3 | * Everything is new. First release 4 | -------------------------------------------------------------------------------- /datafile/test/datafile/stadiums.rb: -------------------------------------------------------------------------------- 1 | 2 | football 'stadiums' ## NOTE: default is setup: 'all' 3 | 4 | -------------------------------------------------------------------------------- /datafile/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ### 0.3.2 2 | 3 | ### 0.0.1 / 2014-11-27 4 | 5 | * Everything is new. First release. 6 | 7 | -------------------------------------------------------------------------------- /datafile/test/datafile/eurocup.rb: -------------------------------------------------------------------------------- 1 | ##################### 2 | # national teams 3 | 4 | football 'national-teams' 5 | football 'euro-cup' 6 | 7 | -------------------------------------------------------------------------------- /dataman/Manifest.txt: -------------------------------------------------------------------------------- 1 | HISTORY.md 2 | Manifest.txt 3 | README.md 4 | Rakefile 5 | lib/dataman.rb 6 | lib/dataman/dataman.rb 7 | lib/dataman/version.rb 8 | test/helper.rb 9 | test/test_sqlite.rb 10 | -------------------------------------------------------------------------------- /datafile/.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | .bundle 4 | .config 5 | coverage 6 | InstalledFiles 7 | lib/bundler/man 8 | pkg 9 | rdoc 10 | spec/reports 11 | test/tmp 12 | test/version_tmp 13 | tmp 14 | 15 | # YARD artifacts 16 | .yardoc 17 | _yardoc 18 | doc/ 19 | -------------------------------------------------------------------------------- /dataman/test/helper.rb: -------------------------------------------------------------------------------- 1 | 2 | ## minitest setup 3 | require 'minitest/autorun' 4 | 5 | ## deps 6 | require 'sportdb/models' # note: will include worlddb (and activerecord machinery) 7 | require 'datafile' 8 | 9 | ## our own code 10 | require 'dataman' 11 | 12 | -------------------------------------------------------------------------------- /dataman/lib/dataman.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | 4 | # our own code 5 | 6 | require 'dataman/version' ## let version always go first 7 | require 'dataman/dataman' 8 | 9 | 10 | 11 | 12 | # say hello 13 | puts Dataman.banner if defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # datasets - tools, libraries & scripts 2 | 3 | Gems: 4 | 5 | - [**datafile**](datafile) - builder for downloading 'n' reading datasets 6 | - [dataman](dataman) - dataset manager using datafiles 7 | 8 | 9 | 10 | ## License 11 | 12 | The scripts are dedicated to the public domain. 13 | Use it as you please with no restrictions whatsoever. 14 | -------------------------------------------------------------------------------- /datafile/test/helper.rb: -------------------------------------------------------------------------------- 1 | 2 | ## minitest setup 3 | require 'minitest/autorun' 4 | 5 | ## deps 6 | 7 | ### require 'worlddb' 8 | require 'sportdb/models' # note: will include worlddb 9 | require 'sportdb/readers' ## pulls in SportDb::ZipPackage or SportDb::DirPackage 10 | 11 | 12 | 13 | $LOAD_PATH.unshift( './lib' ) 14 | 15 | ## our own code 16 | require 'datafile' 17 | -------------------------------------------------------------------------------- /datafile/Manifest.txt: -------------------------------------------------------------------------------- 1 | CHANGELOG.md 2 | Manifest.txt 3 | README.md 4 | Rakefile 5 | data/football.txt 6 | lib/datafile.rb 7 | lib/datafile/builder.rb 8 | lib/datafile/datafile.rb 9 | lib/datafile/dataset.rb 10 | lib/datafile/version.rb 11 | lib/datafile/workers/file/dataset.rb 12 | lib/datafile/workers/file/registry.rb 13 | lib/datafile/workers/file/worker.rb 14 | lib/datafile/workers/zip/dataset.rb 15 | lib/datafile/workers/zip/worker.rb 16 | -------------------------------------------------------------------------------- /datafile/attic/lib/workers/datafile.rb: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | class DatasetNode ### find a better name (e.g. DatasetWorker, DatasetBase, DatasetRef, DatasetWrapper ???) ??? 5 | 6 | 7 | def initialize( dataset ) 8 | @dataset = dataset 9 | end 10 | 11 | def name() @dataset.name; end ## delegate attributes to "core" dataset struct 12 | def opts() @dataset.opts; end 13 | def setup() @dataset.setup; end 14 | def format() @dataset.format; end 15 | 16 | end # class DatasetNode 17 | 18 | -------------------------------------------------------------------------------- /dataman/test/test_sqlite.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | ### 4 | # to run use 5 | # ruby -I ./lib -I ./test test/test_sqlite.rb 6 | 7 | 8 | require 'helper' 9 | 10 | class TestSqlite < MiniTest::Test 11 | 12 | def test_connect 13 | m = Dataman::Dataman.new( adapter: 'sqlite3', database: ':memory:' ) 14 | m.connect 15 | 16 | assert true # if we get here - test success 17 | end 18 | 19 | def test_clean 20 | ## to be done 21 | assert true # if we get here - test success 22 | end 23 | 24 | end # class TestSqlite 25 | -------------------------------------------------------------------------------- /dataman/lib/dataman/version.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Dataman 4 | 5 | MAJOR = 0 ## todo: namespace inside version or something - why? why not?? 6 | MINOR = 1 7 | PATCH = 0 8 | VERSION = [MAJOR,MINOR,PATCH].join('.') 9 | 10 | def self.version 11 | VERSION 12 | end 13 | 14 | def self.banner 15 | "dataman/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]" 16 | end 17 | 18 | def self.root 19 | "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}" 20 | end 21 | 22 | end # module Dataman 23 | 24 | -------------------------------------------------------------------------------- /datafile/attic/lib/file_worker.rb: -------------------------------------------------------------------------------- 1 | 2 | class FileWorker 3 | 4 | def read 5 | ## note: also run inlines (setup script) before 6 | @datafile.inlines.each do |inline| 7 | inline.call 8 | end 9 | end 10 | 11 | def calc 12 | @datafile.scripts.each do |script| 13 | script.call 14 | end 15 | end 16 | 17 | def dump 18 | ## also dump inlines 19 | @datafile.inlines.each do |inline| 20 | inline.dump 21 | end 22 | 23 | ## also dump scripts 24 | @datafile.scripts.each do |script| 25 | script.dump 26 | end 27 | end 28 | end 29 | -------------------------------------------------------------------------------- /datafile/attic/lib/zip_worker.rb: -------------------------------------------------------------------------------- 1 | class ZipWorker 2 | 3 | 4 | def read 5 | ## note: also run inlines (setup script) before 6 | @datafile.inlines.each do |inline| 7 | inline.call 8 | end 9 | end 10 | 11 | def calc 12 | @datafile.scripts.each do |script| 13 | script.call 14 | end 15 | end 16 | 17 | def dump 18 | ## also dump inlines 19 | @datafile.inlines.each do |inline| 20 | inline.dump 21 | end 22 | ## also dump scripts 23 | @datafile.scripts.each do |script| 24 | script.dump 25 | end 26 | end 27 | 28 | 29 | end # class ZipWorker -------------------------------------------------------------------------------- /datafile/lib/datafile/version.rb: -------------------------------------------------------------------------------- 1 | 2 | module Datafile 3 | 4 | MAJOR = 0 ## todo: namespace inside version or something - why? why not?? 5 | MINOR = 3 6 | PATCH = 2 7 | VERSION = [MAJOR,MINOR,PATCH].join('.') 8 | 9 | def self.version 10 | VERSION 11 | end 12 | 13 | def self.banner 14 | "datafile/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})" 15 | end 16 | 17 | def self.root 18 | File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) ) 19 | end 20 | 21 | def self.data_path 22 | "#{root}/data" 23 | end 24 | 25 | end # module Datafile 26 | -------------------------------------------------------------------------------- /datafile/README.md: -------------------------------------------------------------------------------- 1 | # datafile 2 | 3 | datafile gem - builder for downloading 'n' reading datasets 4 | 5 | * home :: [github.com/rubycocos/datasets](https://github.com/rubycocos/datasets) 6 | * bugs :: [github.com/rubycocos/datasets/issues](https://github.com/rubycocos/datasets/issues) 7 | * gem :: [rubygems.org/gems/datafile](https://rubygems.org/gems/datafile) 8 | * rdoc :: [rubydoc.info/gems/datafile](http://rubydoc.info/gems/datafile) 9 | 10 | 11 | ## Usage 12 | 13 | To be done 14 | 15 | 16 | ## License 17 | 18 | The `datafile` scripts are dedicated to the public domain. 19 | Use it as you please with no restrictions whatsoever. 20 | 21 | -------------------------------------------------------------------------------- /dataman/Rakefile: -------------------------------------------------------------------------------- 1 | require 'hoe' 2 | require './lib/dataman/version.rb' 3 | 4 | Hoe.spec 'dataman' do 5 | 6 | self.version = Dataman::VERSION 7 | 8 | self.summary = 'dataman - dataset manager using datafiles' 9 | self.description = summary 10 | 11 | self.urls = ['https://github.com/textkit/dataman'] 12 | 13 | self.author = 'Gerald Bauer' 14 | self.email = 'ruby-talk@ruby-lang.org' 15 | 16 | # switch extension to .markdown for gihub formatting 17 | self.readme_file = 'README.md' 18 | self.history_file = 'HISTORY.md' 19 | 20 | self.extra_deps = [ 21 | ] 22 | 23 | self.licenses = ['Public Domain'] 24 | 25 | self.spec_extras = { 26 | required_ruby_version: '>= 1.9.2' 27 | } 28 | 29 | end 30 | -------------------------------------------------------------------------------- /datafile/lib/datafile/workers/file/worker.rb: -------------------------------------------------------------------------------- 1 | 2 | 3 | module Datafile 4 | 5 | class FileWorker ## check: rename to FileDatafileWorker?? or FileDatafile -why, why not ?? 6 | 7 | include LogUtils::Logging 8 | 9 | def initialize( datafile ) 10 | @datafile = datafile 11 | end 12 | 13 | def download 14 | ## note: do NOTHING for now; assume repo already present (unpacked) on local filesystem 15 | end 16 | 17 | def read 18 | @datafile.datasets.each do |dataset| 19 | f = FileDataset.new( dataset ) 20 | f.read 21 | end 22 | end 23 | 24 | def dump 25 | @datafile.datasets.each do |dataset| 26 | f = FileDataset.new( dataset ) 27 | f.dump 28 | end 29 | end 30 | 31 | end # class FileWorker 32 | end # module Datafile 33 | -------------------------------------------------------------------------------- /dataman/README.md: -------------------------------------------------------------------------------- 1 | # dataman 2 | 3 | dataset manager using datafiles 4 | 5 | * home :: [github.com/textkit/dataman](https://github.com/textkit/dataman) 6 | * bugs :: [github.com/textkit/dataman/issues](https://github.com/textkit/dataman/issues) 7 | * gem :: [rubygems.org/gems/dataman](https://rubygems.org/gems/dataman) 8 | * rdoc :: [rubydoc.info/gems/dataman](http://rubydoc.info/gems/dataman) 9 | * forum :: [ruby-talk@ruby-lang.org](www.ruby-lang.org/en/community/mailing-lists/) 10 | 11 | 12 | ## Usage 13 | 14 | to be done 15 | 16 | 17 | ## Install 18 | 19 | Just install the gem: 20 | 21 | $ gem install dataman 22 | 23 | 24 | ## License 25 | 26 | The `dataman` scripts are dedicated to the public domain. 27 | Use it as you please with no restrictions whatsoever. 28 | 29 | -------------------------------------------------------------------------------- /datafile/Rakefile: -------------------------------------------------------------------------------- 1 | require 'hoe' 2 | require './lib/datafile/version.rb' 3 | 4 | Hoe.spec 'datafile' do 5 | 6 | self.version = Datafile::VERSION 7 | 8 | self.summary = 'datafile - builder for downloading n reading datasets' 9 | self.description = summary 10 | 11 | self.urls = { home: 'https://github.com/rubycocos/datasets' } 12 | 13 | self.author = 'Gerald Bauer' 14 | self.email = 'gerald.bauer@gmail.com' 15 | 16 | # switch extension to .markdown for gihub formatting 17 | self.readme_file = 'README.md' 18 | self.history_file = 'CHANGELOG.md' 19 | 20 | self.extra_deps = [ 21 | ['logutils'], 22 | ['fetcher'], 23 | ] 24 | 25 | self.licenses = ['Public Domain'] 26 | 27 | self.spec_extras = { 28 | required_ruby_version: '>= 2.2.2' 29 | } 30 | end 31 | -------------------------------------------------------------------------------- /datafile/lib/datafile/workers/zip/worker.rb: -------------------------------------------------------------------------------- 1 | 2 | 3 | module Datafile 4 | 5 | class ZipWorker ## check: rename to ZipDatafileWorker?? or ZipDatafile -why, why not ?? 6 | 7 | include LogUtils::Logging 8 | 9 | def initialize( datafile ) 10 | @datafile = datafile 11 | end 12 | 13 | def download 14 | @datafile.datasets.each do |dataset| 15 | z = ZipDataset.new( dataset ) 16 | z.download 17 | end 18 | end 19 | 20 | def read 21 | @datafile.datasets.each do |dataset| 22 | z = ZipDataset.new( dataset ) 23 | z.read 24 | end 25 | end 26 | 27 | def dump 28 | @datafile.datasets.each do |dataset| 29 | z = ZipDataset.new( dataset ) 30 | z.dump 31 | end 32 | end 33 | 34 | end # class ZipWorker 35 | 36 | end # module Datafile 37 | -------------------------------------------------------------------------------- /datafile/attic/lib/workers/file/dataset.rb: -------------------------------------------------------------------------------- 1 | 2 | def repo_dir ### check: use (rename to) include dir (or local_repo_dir) - why, why not ??? 3 | ## note: for easy testing allow "in situ" datasets 4 | ## e.g. ./ (e.g. mu-mauritius) is openfootball/mu-mauritius 5 | ## split name in org/user + project (e.g. openfootball/at-austria) 6 | parts = @dataset.name.split( '/' ) 7 | 8 | basename = parts[1] 9 | if File.basename( Dir.getwd ) == basename 10 | puts " bingo!! working folder >#{basename}< matches dataset" 11 | return Dir.getwd ## assume working directory/folder is repo dir 12 | end 13 | 14 | registry.lookup( @dataset.name ) 15 | end 16 | 17 | 18 | 19 | private 20 | def registry() self.class.registry; end ## convenience method to access "static" shared class variable 21 | -------------------------------------------------------------------------------- /datafile/attic/lib/workers/beer.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Datafile 4 | 5 | class BeerFileDataset < FileDataset 6 | 7 | def initialize( dataset ) 8 | super( dataset ) 9 | end 10 | 11 | def read() 12 | logger.info( "read beer-dataset (file) '#{name}', '#{setup}'" ) 13 | 14 | ## BeerDb.read_setup( setup, repo_dir ) 15 | puts "FIX/TODO - read beer dataset -- to be (re)done, sorry!!!" 16 | end 17 | end # class BeerFileDataset 18 | 19 | 20 | class BeerZipDataset < ZipDataset 21 | 22 | def initialize( dataset ) 23 | super( dataset ) 24 | end 25 | 26 | def read 27 | logger.info( "read beer-dataset (zip) '#{name}', '#{setup}'" ) 28 | 29 | ## BeerDb.read_setup_from_zip( local_zip_name, setup, local_zip_root ) 30 | end 31 | end # class BeerZipDataset 32 | 33 | end # module Datafile 34 | -------------------------------------------------------------------------------- /datafile/attic/lib/builder.rb: -------------------------------------------------------------------------------- 1 | # attic note: remove inline/script block for now (keep it simple) - why? why not? 2 | 3 | 4 | ## add models shortcuts/constants if exist 5 | ## todo/check: is there a better way ? 6 | ## e.g. just include in inline section/block - for example? 7 | ## check if already included? and check on load to include? 8 | ## models might not yet be required 9 | 10 | if defined?( WorldDb ) && defined?( WorldDb::Models ) 11 | include WorldDb::Models 12 | else 13 | puts "*** sorry; can't include WorldDb::Models; not yet defined (required)" 14 | end 15 | ## todo/check: also include FootballDb::Models and BeerDb::Models ??? 16 | 17 | 18 | ## "special" datasets 19 | 20 | def inline( &block ) 21 | logger.info( "[builder] add inline script-block" ) 22 | @datafile.inlines << Inline.new( block ) 23 | end 24 | -------------------------------------------------------------------------------- /datafile/attic/lib/dataset.rb: -------------------------------------------------------------------------------- 1 | 2 | def file? # note: use file? (not exit? might use zip? later to check if zip exists? -why? why not?) 3 | ## hack/convenience shortcut: 4 | ## 1) check if dataset exists as local (in situ) file dataset 5 | ## e.g. working folder == name of dataset 6 | ## 7 | ## todo/fix: 2) also check 8 | ## via (file)registry - why, why not?? 9 | 10 | ## split name in org/user + project (e.g. openfootball/at-austria) 11 | parts = @name.split( '/' ) 12 | 13 | basename = parts[1] 14 | ## e.g. 15 | ## ./ (working folder) => at-austria 16 | ## openfootball/at-austria 17 | if File.basename( Dir.getwd ) == basename 18 | puts " bingo!! working folder >#{basename}< matches dataset" 19 | true ## return true 20 | else 21 | false ## return false 22 | end 23 | end ## file? 24 | -------------------------------------------------------------------------------- /datafile/attic/lib/workers/football.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Datafile 4 | 5 | class FootballFileDataset < FileDataset 6 | 7 | def initialize( dataset ) 8 | super( dataset ) 9 | end 10 | 11 | def read 12 | logger.info( "read football-dataset (file) '#{name}', '#{setup}'" ) 13 | 14 | pack = SportDb::Package.new( repo_dir ) 15 | pack.read( season: setup ) ## note: pass on (optional) setup arg as season (filter) arg for now 16 | end 17 | end # class FootballFileDataset 18 | 19 | 20 | class FootballZipDataset < ZipDataset 21 | 22 | def initialize( dataset ) 23 | super( dataset ) 24 | end 25 | 26 | def read 27 | logger.info( "read football-dataset (zip) '#{name}', '#{setup}'" ) 28 | 29 | ## SportDb.read_setup_from_zip( local_zip_name, setup, local_zip_root ) 30 | end 31 | end # class FootballZipDataset 32 | 33 | end # module Datafile 34 | -------------------------------------------------------------------------------- /datafile/attic/lib/workers/world.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Datafile 4 | 5 | class WorldFileDataset < FileDataset 6 | 7 | def initialize( dataset ) 8 | super( dataset ) 9 | end 10 | 11 | def read 12 | logger.info( "read world-dataset (file) '#{name}', '#{setup}'" ) 13 | 14 | ## WorldDb.read_setup( 'setups/countries', WORLD_DB_INCLUDE_PATH, skip_tags: true ) 15 | ## WorldDb.read_setup( setup, repo_dir, skip_tags: true ) 16 | puts "FIX/TODO - read world dataset -- to be (re)done, sorry!!!" 17 | end 18 | end # class WorldFileDataset 19 | 20 | 21 | class WorldZipDataset < ZipDataset 22 | 23 | def initialize( dataset ) 24 | super( dataset ) 25 | end 26 | 27 | def read 28 | logger.info( "read world-dataset (zip) '#{name}', '#{setup}'" ) 29 | 30 | ## WorldDb.read_setup_from_zip( local_zip_name, setup, local_zip_root, { skip_tags: true } ) 31 | end 32 | end # class WorldZipDataset 33 | 34 | end # module Datafile 35 | -------------------------------------------------------------------------------- /datafile/test/test_builder.rb: -------------------------------------------------------------------------------- 1 | ### 2 | # to run use 3 | # ruby test/test_builder.rb 4 | 5 | 6 | require_relative 'helper' 7 | 8 | class TestBuilder < Minitest::Test 9 | 10 | def test_builder 11 | code =< #{datafile.deps.inspect}:" 28 | puts " #{datafile.datasets.size} datasets, #{datafile.scripts.size} scripts" 29 | datafile.dump 30 | datafile.calc ## try calc (call dummy calculations for testing) 31 | end 32 | 33 | assert true # if we get here - test success 34 | end 35 | 36 | end # class TestBuilder2 37 | -------------------------------------------------------------------------------- /datafile/lib/datafile/builder.rb: -------------------------------------------------------------------------------- 1 | 2 | module Datafile 3 | 4 | class Builder ## "simple" builder (one file, one datafile) 5 | 6 | def self.load_file( path ) 7 | code = File.open( path, 'r:utf-8' ) {|f| f.read } 8 | self.load( code ) 9 | end 10 | 11 | def self.load( code ) 12 | builder = Builder.new 13 | builder.instance_eval( code ) 14 | builder 15 | end 16 | 17 | 18 | include LogUtils::Logging 19 | 20 | def initialize 21 | @datafile = Datafile.new 22 | end 23 | 24 | attr_reader :datafile 25 | 26 | 27 | ## "classic" standard datasets 28 | def beer( name, opts={} ) 29 | logger.info( "[builder] add beer-dataset '#{name}'" ) 30 | @datafile.datasets << BeerDataset.new( name, opts ) 31 | end 32 | 33 | def football( name, opts={} ) 34 | logger.info( "[builder] add football-dataset '#{name}'" ) 35 | @datafile.datasets << FootballDataset.new( name, opts ) 36 | end 37 | 38 | def world( name, opts={} ) 39 | logger.info( "[builder] add world-dataset '#{name}'" ) 40 | @datafile.datasets << WorldDataset.new( name, opts ) 41 | end 42 | 43 | end # class Builder 44 | end # module Datafile 45 | -------------------------------------------------------------------------------- /datafile/data/football.txt: -------------------------------------------------------------------------------- 1 | ########################################################## 2 | # known football.db (openfootball) datasets (repos) 3 | 4 | ##################### 5 | # -- Basics 6 | 7 | openfootball/stadiums 8 | openfootball/players 9 | openfootball/assocs 10 | 11 | ########################## 12 | # -- National Teams 13 | 14 | openfootball/national-teams 15 | 16 | openfootball/world-cup 17 | openfootball/euro-cup 18 | openfootball/africa-cup 19 | openfootball/north-america-gold-cup 20 | openfootball/copa-america 21 | openfootball/confed-cup 22 | 23 | ######################### 24 | # -- Clubs 25 | 26 | openfootball/clubs 27 | openfootball/world 28 | 29 | openfootball/england 30 | openfootball/austria 31 | openfootball/deutschland 32 | openfootball/espana 33 | openfootball/italy 34 | openfootball/france 35 | openfootball/europe-champions-league 36 | 37 | openfootball/mexico 38 | openfootball/north-america-champions-league 39 | 40 | openfootball/brazil 41 | openfootball/copa-sudamericana 42 | openfootball/copa-libertadores 43 | 44 | openfootball/club-world-cup 45 | 46 | 47 | 48 | ############ 49 | # in csv format 50 | 51 | footballcsv/england 52 | footballcsv/deutschland 53 | 54 | ## todo/fix: add some more ... 55 | -------------------------------------------------------------------------------- /datafile/lib/datafile.rb: -------------------------------------------------------------------------------- 1 | require 'logutils' 2 | require 'fetcher' ## add (HTTP/S) download support (of blobs/binaries/zips) 3 | 4 | 5 | # our own code 6 | require_relative 'datafile/version' # let it always go first 7 | 8 | 9 | 10 | module Datafile 11 | 12 | class Error < StandardError 13 | end 14 | 15 | #### 16 | # todo/check: 17 | # rename to DatasetNotFound or similar?? 18 | # use "common" error class - why? why not? 19 | class DatasetNotFoundError < Error 20 | attr_reader :message 21 | 22 | def initialize( message ) 23 | @message = message 24 | end 25 | 26 | def to_s 27 | "datset not found => #{@message}" 28 | end 29 | end 30 | end # module Datafile 31 | 32 | 33 | require_relative 'datafile/dataset' 34 | require_relative 'datafile/builder' 35 | 36 | 37 | require_relative 'datafile/workers/file/registry' 38 | require_relative 'datafile/workers/file/dataset' 39 | require_relative 'datafile/workers/file/worker' 40 | 41 | require_relative 'datafile/workers/zip/dataset' 42 | require_relative 'datafile/workers/zip/worker' 43 | 44 | require_relative 'datafile/datafile' 45 | 46 | 47 | # say hello 48 | puts Datafile.banner ## if defined?($RUBYCOCO_DEBUG) && $RUBYCOCO_DEBUG 49 | -------------------------------------------------------------------------------- /datafile/lib/datafile/workers/file/dataset.rb: -------------------------------------------------------------------------------- 1 | 2 | module Datafile 3 | 4 | class FileDataset 5 | ## read dataset from file(system) 6 | 7 | include LogUtils::Logging 8 | 9 | 10 | def self.registry 11 | @@registry ||= FileDatasetRegistry.new 12 | @@registry 13 | end 14 | 15 | def initialize( dataset ) 16 | @dataset = dataset 17 | end 18 | 19 | def repo_dir ### check: use (rename to) include dir (or local_repo_dir) - why, why not ??? 20 | self.class.registry.lookup( @dataset.name ) 21 | end 22 | 23 | def dump 24 | ## for debuggin dump dataset -- todo (also check if folder exits ??) 25 | puts "dataset '#{@dataset.name}' opts=#{@dataset.opts.inspect}" ## use opts.inspect instead of to_json - why? why not? 26 | puts " repo-dir '#{repo_dir}'" 27 | end 28 | 29 | def read 30 | if @dataset.is_a?( FootballDataset ) 31 | logger.info( "read football dataset (file) '#{@dataset.name}', '#{@dataset.setup}'" ) 32 | 33 | pack = SportDb::DirPackage.new( repo_dir ) 34 | pack.read( season: @dataset.setup ) ## note: pass on (optional) setup arg as season (filter) arg for now 35 | else 36 | logger.info( "TODO/FIX: read dataset (file) '#{@dataset.name}', '#{@dataset.setup}'; sorry" ) 37 | end 38 | end 39 | end # class FileDataset 40 | end # module Datafile 41 | -------------------------------------------------------------------------------- /dataman/lib/dataman/dataman.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Dataman 4 | 5 | class Dataman 6 | 7 | def initialize( config ) 8 | @config = config # db config connection spec hash 9 | end 10 | 11 | 12 | def clean 13 | db_adapter = @config[ 'adapter' ] 14 | 15 | ### for sqlite3 delete/remove single-file database 16 | if db_adapter == 'sqlite3' 17 | db_database = @config[ 'database' ] 18 | 19 | ## check: 20 | ## if in memory e.g. ':memory:' no delete/clean needed!!! 21 | 22 | FileUtils.rm( db_database ) if File.exists?( db_database ) 23 | else 24 | puts "[Dataman] clean: do nothing; no clean steps configured for db adapter >#{db_adapter}<" 25 | end 26 | end 27 | 28 | 29 | def connect 30 | pp @config 31 | ActiveRecord::Base.establish_connection( @config ) 32 | 33 | db_adapter = @config[ 'adapter' ] 34 | 35 | ## try to speed up sqlite 36 | ## see http://www.sqlite.org/pragma.html 37 | if db_adapter == 'sqlite3' 38 | c = ActiveRecord::Base.connection 39 | 40 | ## check: 41 | ## if in memory e.g. ':memory:' no pragma needed!! 42 | 43 | c.execute( 'PRAGMA synchronous=OFF;' ) 44 | c.execute( 'PRAGMA journal_mode=OFF;' ) 45 | c.execute( 'PRAGMA temp_store=MEMORY;' ) 46 | end 47 | end 48 | 49 | 50 | 51 | end # class Dataman 52 | 53 | end # module Dataman 54 | 55 | -------------------------------------------------------------------------------- /datafile/attic/test/datafile2/at.rb: -------------------------------------------------------------------------------- 1 | 2 | puts "[eval] self in top = #{self.class.name}" 3 | 4 | task :at => :importbuiltin do 5 | puts "[eval] self in data (enter) = #{self.class.name}" 6 | football 'at-austria' 7 | puts "[eval] self in data (leave) = #{self.class.name}" 8 | end 9 | 10 | task :at_2014_15 => :importbuiltin do 11 | football 'at-austria', setup: '2014-15' 12 | end 13 | 14 | 15 | task :at_recalc => :at do 16 | calc do 17 | [['at.2012/13'], 18 | ['at.2013/14'], 19 | ['at.2014/15', 'at.2.2014/15']].each do |event_key| 20 | recalc_standings( event_key, out_root: './build/at-autria' ) 21 | end 22 | end 23 | end 24 | 25 | task :at_2014_15_recalc => :at_2014_15 do 26 | calc do 27 | recalc_standings( ['at.2014/15', 'at.2.2014/15'], out_root: './build/at-autria' ) 28 | end 29 | end 30 | 31 | 32 | task :test_at_recalc => :env do 33 | calc do 34 | recalc_standings( ['at.2014/15', 'at.2.2014/15'], out_root: './build/at-autria' ) 35 | ## debug verison - write to ./build/at-austria 36 | ## recalc_standings( ['at.2014/15', 'at.2.2014/15'], out_root: './build/at-austria' ) 37 | end 38 | end 39 | 40 | 41 | ## check if method def works too 42 | 43 | puts "[eval] another self in top = #{self.class.name}" 44 | 45 | 46 | def test_hello() 47 | puts "[eval] self in method test_hello = #{self.class.name}" 48 | puts "hello from test_hello" 49 | end 50 | 51 | test_hello() 52 | -------------------------------------------------------------------------------- /datafile/test/test_football_dataset.rb: -------------------------------------------------------------------------------- 1 | ### 2 | # to run use 3 | # ruby test/test_football_dataset.rb 4 | 5 | 6 | require_relative 'helper' 7 | 8 | class TestFootballDataset < Minitest::Test 9 | 10 | def test_names 11 | pp FootballDataset.known_datasets 12 | 13 | at = FootballDataset.new( 'austria' ) 14 | at2 = FootballDataset.new( 'openfootball/austria' ) 15 | xxx = FootballDataset.new( 'xxx' ) 16 | xxx2 = FootballDataset.new( 'openfootball/xxx' ) 17 | 18 | assert_equal 'openfootball/austria', at.name 19 | assert_equal 'openfootball/austria', at2.name 20 | 21 | assert_equal 'openfootball/xxx', xxx.name 22 | assert_equal 'openfootball/xxx', xxx2.name 23 | 24 | ## check csv format 25 | at = FootballDataset.new( 'austria', format: 'csv' ) 26 | at2 = FootballDataset.new( 'footballcsv/austria' ) 27 | 28 | assert_equal 'footballcsv/austria', at.name 29 | assert_equal 'footballcsv/austria', at2.name 30 | end 31 | 32 | ## move to test datasets - why, why not ?? 33 | def test_world 34 | at = WorldDataset.new( 'austria.db' ) 35 | at2 = WorldDataset.new( 'openmundi/austria.db' ) 36 | xxx = WorldDataset.new( 'xxx' ) 37 | xxx2 = WorldDataset.new( 'openmundi/xxx' ) 38 | 39 | assert_equal 'openmundi/austria.db', at.name 40 | assert_equal 'openmundi/austria.db', at2.name 41 | 42 | assert_equal 'openmundi/xxx', xxx.name 43 | assert_equal 'openmundi/xxx', xxx2.name 44 | end 45 | 46 | def test_beer 47 | at = BeerDataset.new( 'austria' ) 48 | at2 = BeerDataset.new( 'openbeer/austria' ) 49 | xxx = BeerDataset.new( 'xxx' ) 50 | xxx2 = BeerDataset.new( 'openbeer/xxx' ) 51 | 52 | assert_equal 'openbeer/austria', at.name 53 | assert_equal 'openbeer/austria', at2.name 54 | 55 | assert_equal 'openbeer/xxx', xxx.name 56 | assert_equal 'openbeer/xxx', xxx2.name 57 | end 58 | 59 | end # class TestFootballDataset 60 | -------------------------------------------------------------------------------- /datafile/lib/datafile/workers/file/registry.rb: -------------------------------------------------------------------------------- 1 | 2 | module Datafile 3 | 4 | class FileDatasetRegistry 5 | ## store mapping for (local) datasets e.g. 6 | ## map github handle e.g. openfootball/at-austria to local path 7 | 8 | include LogUtils::Logging 9 | 10 | def initialize 11 | ## org rootpaths 12 | @roots = {} 13 | ## org defaults (use merge to overwrite for now) 14 | @roots[:openmundi] = '../../openmundi' ## OPENMUNDI_ROOT = "../../openmundi" 15 | @roots[:openfootball] = '..' ## OPENFOOTBALL_ROOT = ".." 16 | @roots[:openbeer] = '..' 17 | 18 | @roots[:footballcsv] = '..' 19 | end 20 | 21 | def merge( hash ) 22 | ## todo: add support for merging project mappings too 23 | ## use merge_roots and merge_projects ?? why, why not?? 24 | @roots = @roots.merge( hash ) 25 | end 26 | 27 | def lookup( name ) 28 | path, _ = lookup_path( name ) ## note: ignore error message passed along in return 29 | path 30 | end 31 | 32 | def lookup!( name ) 33 | path, error = lookup_path( name ) 34 | raise error if error 35 | path 36 | end 37 | 38 | 39 | private 40 | def lookup_path( name ) 41 | ## split name in org/user + project (e.g. openfootball/at-austria) 42 | parts = name.split( '/' ) 43 | ## check/todo: assert parts == 2 -- why, why not?? 44 | root = @roots[ parts[0].to_sym ] 45 | if root.nil? 46 | msg = "no mapping found for '#{parts[0]}' in '#{name}'" 47 | logger.error( msg ) 48 | return [nil, DatasetNotFoundError.new( msg )] ## throw exception FileNotFound / DatasetNotFound ?? 49 | end 50 | 51 | path = "#{root}/#{parts[1]}" 52 | ## check if folder/directory exists 53 | unless File.exist?( path ) 54 | msg = "no file found for '#{name}'; expected '#{path}'" 55 | logger.error( msg ) 56 | return [nil, DatasetNotFoundError.new( msg )] ## throw exception FileNotFound / DatasetNotFound ?? 57 | end 58 | 59 | ### check for File.directory?( path ) too - why, why not??? 60 | [path, nil] ## use go-style returns with error as second argument (as error as value) 61 | end 62 | end # class FileDatasetRegistry 63 | 64 | end # module Datafile 65 | -------------------------------------------------------------------------------- /datafile/lib/datafile/datafile.rb: -------------------------------------------------------------------------------- 1 | 2 | module Datafile 3 | 4 | 5 | class Datafile 6 | 7 | ## convenience method - use like Datafile.load_file() 8 | def self.load_file( path='./Datafile' ) 9 | code = File.open( path, 'r:utf-8' ) { |f| f.read } 10 | self.load( code ) 11 | end 12 | 13 | ## another convenience method - use like Datafile.load() 14 | def self.load( code ) 15 | builder = Builder.new 16 | builder.instance_eval( code ) 17 | 18 | # Note: return datafile (of course, NOT the builder) 19 | # if you want a builder use Datafile::Builder ;-) 20 | datafile = builder.datafile 21 | datafile 22 | end 23 | 24 | 25 | include LogUtils::Logging 26 | 27 | 28 | def initialize( opts={} ) 29 | @opts = opts 30 | @datasets = [] 31 | 32 | @worker = if opts[:file] 33 | FileWorker.new( self ) 34 | else ## default to zip worker for now 35 | ZipWorker.new( self ) 36 | end 37 | end 38 | 39 | attr_reader :datasets 40 | 41 | 42 | def worker=( value ) # lets you change worker - find a better way - how, why, why not?? 43 | @worker = if value.is_a?( Class ) ## let's you pass in FileWorker or ZipWorker etc. 44 | value.new( self ) 45 | elsif value.to_sym == :file 46 | FileWorker.new( self ) 47 | elsif value.to_sym == :zip 48 | ZipWorker.new( self ) 49 | else 50 | value 51 | end 52 | end 53 | 54 | 55 | def run 56 | logger.info( "[datafile] begin - run" ) 57 | download # step 1 - download zips for datasets 58 | read # step 2 - read in datasets from zips - note: includes running inlines 59 | logger.info( "[datafile] end - run" ) 60 | end 61 | 62 | 63 | def download 64 | logger.info( "[datafile] dowload" ) 65 | @worker.download 66 | ## check: use @worker.download( @datasets) - why, why not?? link worker w/ datafile - why, why not?? 67 | end 68 | 69 | def read 70 | logger.info( "[datafile] read" ) 71 | @worker.read 72 | end 73 | 74 | def dump 75 | ## for debugging dump datasets (note: will/might also check if zip exits) 76 | logger.info( "[datafile] dump datasets (for debugging)" ) 77 | @worker.dump 78 | end 79 | 80 | end # class Datafile 81 | end # module Datafile 82 | -------------------------------------------------------------------------------- /datafile/lib/datafile/workers/zip/dataset.rb: -------------------------------------------------------------------------------- 1 | 2 | module Datafile 3 | 4 | 5 | class ZipDataset ### use (rename to) ZipDatasetWorker/Helper/Wrapper/Fetcher/Downloader - why, why not ??? 6 | ## read dataset from zip(archive) 7 | 8 | include LogUtils::Logging 9 | 10 | def initialize( dataset ) 11 | @dataset = dataset 12 | end 13 | 14 | def remote_zip_url # remote zip url 15 | "https://github.com/#{@dataset.name}/archive/master.zip" 16 | end 17 | 18 | def local_zip_name 19 | ### note: replace / in name w/ --I-- 20 | ## e.g. flatten the filename, that is, do NOT include any folders 21 | @dataset.name.gsub('/', '--I--') # note: will NOT include/return .zip extension 22 | end 23 | 24 | def local_zip_root 25 | "./tmp" 26 | end 27 | 28 | def local_zip_path # local zip path 29 | "#{local_zip_root}/#{local_zip_name}.zip" 30 | end 31 | 32 | 33 | def download 34 | logger.info( "download dataset '#{@dataset.name}'" ) 35 | logger.info( " from '#{remote_zip_url}'" ) 36 | logger.info( " to '#{local_zip_path}'..." ) 37 | 38 | download_blob( remote_zip_url, local_zip_path ) 39 | end 40 | 41 | 42 | def dump 43 | ## for debuggin dump dataset (also check if zip exits) 44 | puts "dataset '#{@dataset.name}' opts=#{@dataset.opts.to_json}" ## use opts.inspect instead of to_json - why? why not? 45 | puts " local '#{local_zip_name}' (#{local_zip_path})" 46 | if File.exist?( local_zip_path ) 47 | puts " size: #{File.size(local_zip_path)} bytes" 48 | else 49 | puts " (file not found)" 50 | end 51 | puts " remote '#{remote_zip_url}'" 52 | end 53 | 54 | def read 55 | if @dataset.is_a?( FootballDataset ) 56 | logger.info( "read football dataset (zip) '#{@dataset.name}', '#{@dataset.setup}'" ) 57 | 58 | pack = SportDb::ZipPackage.new( local_zip_path ) 59 | pack.read( season: @dataset.setup ) ## note: pass on (optional) setup arg as season (filter) arg for now 60 | else 61 | logger.info( "TODO/FIX: read dataset (zip) '#{@dataset.name}', '#{@dataset.setup}'; sorry" ) 62 | end 63 | end 64 | 65 | 66 | private 67 | #### 68 | # download tasks for zips 69 | def download_blob( url, dest ) 70 | logger.info "downloading #{url} to #{dest}..." 71 | 72 | ## make sure dest path exists 73 | dest_p = File.dirname( dest ) 74 | FileUtils.mkdir_p( dest_p ) unless File.exist?( dest_p ) ## use Dir.exist?? why? why not?? 75 | 76 | worker = Fetcher::Worker.new 77 | worker.copy( url, dest ) 78 | ## print some file stats 79 | logger.debug " size: #{File.size(dest)} bytes" 80 | end 81 | 82 | end # class DatasetZip 83 | 84 | end # module Datafile 85 | -------------------------------------------------------------------------------- /datafile/attic/lib/builder2.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module Datafile 4 | 5 | 6 | ### 7 | ## check/todo: rename to BatchBuilder, MultiBuilder,etc - find better name - why, why not?? 8 | 9 | class BuilderEx 10 | 11 | def self.load_file( path ) 12 | code = File.read_utf8( path ) 13 | self.load( code ) 14 | end 15 | 16 | def self.load( code ) 17 | builder = BuilderEx.new 18 | builder.instance_eval( code ) 19 | builder 20 | end 21 | 22 | 23 | include LogUtils::Logging 24 | 25 | def initialize 26 | @datafiles = [] 27 | @datafile = nil 28 | end 29 | 30 | attr_reader :datafiles 31 | 32 | 33 | def task( arg ) 34 | 35 | logger.info( "[builder] add task '#{arg.inspect}' : #{arg.class.name}" ) 36 | 37 | if arg.kind_of?( String ) || arg.kind_of?( Symbol ) # e.g. 'at' or :at 38 | name = arg.to_s 39 | ## note: always default to FileWorker for now 40 | ## -- use file: true -- find better name e.g. worker/source: file - why? why not?? 41 | @datafile = Datafile.new( name: name, deps: [], file: true ) 42 | yield ### execute block in context 43 | ## b = Builder.new 44 | ## block.call( b ) ## same as b.instance_eval( &block) ??? 45 | ## b.instance_eval( code ) 46 | ## b = Builder.load( &block ) 47 | elsif arg.kind_of?( Hash ) ## Hash e.g. :at_calc => :at etc. 48 | key = arg.keys.first 49 | value = arg[key] ## todo: check if single value? always turn into array 50 | 51 | name = key.to_s ## get first key (assume it's name) 52 | if value.kind_of?( Array ) 53 | deps = value.map { |v| v.to_s } ## convert to strings 54 | else ## assume single string/symbol -- convert to array 55 | deps = [value.to_s] 56 | end 57 | @datafile = Datafile.new( name: name, deps: deps, file: true ) ## note: always default to FileWorker for now 58 | yield ### execute block in context 59 | ## to be done 60 | else 61 | ## fix: report error: unknown type 62 | end 63 | 64 | @datafiles << @datafile 65 | end 66 | 67 | def calc( &block ) 68 | logger.info( "[builder] add script calc-block" ) 69 | @datafile.scripts << Script.new( block ) 70 | end 71 | 72 | ################################ 73 | # "classic/standard" datasets 74 | def beer( name, opts={} ) 75 | logger.info( "[builder] add beer-dataset '#{name}'" ) 76 | @datafile.datasets << BeerDataset.new( name, opts ) 77 | end 78 | 79 | def football( name, opts={} ) 80 | logger.info( "[builder] add football-dataset '#{name}'" ) 81 | @datafile.datasets << FootballDataset.new( name, opts ) 82 | end 83 | 84 | def world( name, opts={} ) 85 | logger.info( "[builder] add world-dataset '#{name}'" ) 86 | @datafile.datasets << WorldDataset.new( name, opts ) 87 | end 88 | 89 | end # class Builder2 90 | end # module Datafile 91 | -------------------------------------------------------------------------------- /datafile/NOTES.md: -------------------------------------------------------------------------------- 1 | # Notes 2 | 3 | 4 | ## Misc 5 | 6 | run tests 7 | 8 | ``` 9 | [info] [builder] add world-dataset 'world.db' 10 | [info] [builder] add football-dataset 'national-teams' 11 | [info] [builder] add football-dataset 'world-cup' 12 | [info] [datafile] dump datasets (for debugging) 13 | dataset 'openmundi/world.db' opts={"setup":"countries"} 14 | local 'openmundi--I--world.db' (./tmp/openmundi--I--world.db.zip) 15 | (file not found) 16 | remote 'http://github.com/openmundi/world.db/archive/master.zip' 17 | dataset 'openfootball/national-teams' opts={} 18 | local 'openfootball--I--national-teams' (./tmp/openfootball--I--national-teams.zip) 19 | (file not found) 20 | remote 'http://github.com/openfootball/national-teams/archive/master.zip' 21 | dataset 'openfootball/world-cup' opts={"setup":"2014"} 22 | local 'openfootball--I--world-cup' (./tmp/openfootball--I--world-cup.zip) 23 | (file not found) 24 | remote 'http://github.com/openfootball/world-cup/archive/master.zip' 25 | [info] [datafile] dump datasets (for debugging) 26 | dataset 'openmundi/world.db' opts={"setup":"countries"} 27 | [error] no file found for 'openmundi/world.db'; expected '../../openmundi/world.db' 28 | repo-dir '' 29 | dataset 'openfootball/national-teams' opts={} 30 | [error] no file found for 'openfootball/national-teams'; expected '../national-teams' 31 | repo-dir '' 32 | dataset 'openfootball/world-cup' opts={"setup":"2014"} 33 | [error] no file found for 'openfootball/world-cup'; expected '../world-cup' 34 | repo-dir '' 35 | . 36 | 37 | Finished in 0.010819s, 92.4314 runs/s, 92.4314 assertions/s. 38 | 1 runs, 1 assertions, 0 failures, 0 errors, 0 skips 39 | 40 | 41 | [info] [builder] add world-dataset 'world.db' 42 | [info] [builder] add football-dataset 'national-teams' 43 | [info] [builder] add football-dataset 'world-cup' 44 | [info] [datafile] dump datasets (for debugging) 45 | dataset 'openmundi/world.db' opts={"setup":"countries"} 46 | local 'openmundi--I--world.db' (./tmp/openmundi--I--world.db.zip) 47 | (file not found) 48 | remote 'http://github.com/openmundi/world.db/archive/master.zip' 49 | dataset 'openfootball/national-teams' opts={} 50 | local 'openfootball--I--national-teams' (./tmp/openfootball--I--national-teams.zip) 51 | (file not found) 52 | remote 'http://github.com/openfootball/national-teams/archive/master.zip' 53 | dataset 'openfootball/world-cup' opts={"setup":"2014"} 54 | local 'openfootball--I--world-cup' (./tmp/openfootball--I--world-cup.zip) 55 | (file not found) 56 | remote 'http://github.com/openfootball/world-cup/archive/master.zip' 57 | [info] [datafile] dump datasets (for debugging) 58 | dataset 'openmundi/world.db' opts={:setup=>"countries"} 59 | [error] no file found for 'openmundi/world.db'; expected '../../openmundi/world.db' 60 | repo-dir '' 61 | dataset 'openfootball/national-teams' opts={} 62 | [error] no file found for 'openfootball/national-teams'; expected '../national-teams' 63 | repo-dir '' 64 | dataset 'openfootball/world-cup' opts={:setup=>"2014"} 65 | [error] no file found for 'openfootball/world-cup'; expected '../world-cup' 66 | repo-dir '' 67 | . 68 | 69 | Finished in 0.050179s, 19.9287 runs/s, 19.9287 assertions/s. 70 | 1 runs, 1 assertions, 0 failures, 0 errors, 0 skips 71 | ``` -------------------------------------------------------------------------------- /datafile/lib/datafile/dataset.rb: -------------------------------------------------------------------------------- 1 | 2 | ############ 3 | # fix: move global method to ?? 4 | 5 | def read_known_datasets( path ) 6 | ary = [] 7 | lines = File.open( path, 'r:utf-8' ) {|f| f.read } 8 | lines.each_line do |line| 9 | ## skip blank and comments lines 10 | next if /^\s*#/ =~ line || /^\s*$/ =~ line 11 | 12 | ary << line.strip 13 | end 14 | ary 15 | end 16 | 17 | 18 | module Datafile 19 | 20 | class Dataset 21 | include LogUtils::Logging 22 | 23 | def initialize( name, opts={} ) 24 | @name = name 25 | @opts = opts 26 | end 27 | 28 | attr_reader :name, :opts 29 | 30 | ## convenience helpers for known opts 31 | def setup() @opts[:setup]; end ## note: return nil if not found/set 32 | def format() @opts[:format] || 'txt'; end ## note: assume default is txt (other formats incl. csv) for now - why? wh not? 33 | end # class Dataset 34 | 35 | 36 | class WorldDataset < Dataset 37 | def initialize( name_easy, opts={} ) 38 | 39 | ## check if name include slash (e.g. /) 40 | ## - if not auto-add openmundi/ (default) 41 | if name_easy.index( '/' ).nil? 42 | name = "openmundi/#{name_easy}" 43 | else 44 | name = name_easy ## just pass through for now 45 | end 46 | 47 | super( name, opts ) ## todo/check: just juse super (e.g. pass along all params - why? why not?) 48 | end 49 | end # class WorldDataset 50 | 51 | 52 | 53 | class FootballDataset < Dataset 54 | 55 | def self.build_known_datasets 56 | read_known_datasets( "#{::Datafile.data_path}/football.txt" ) 57 | end 58 | 59 | def self.known_datasets 60 | @@known_football_datasets ||= build_known_datasets 61 | @@known_football_datasets 62 | end 63 | 64 | 65 | def initialize( name_easy, opts={} ) 66 | 67 | ## check if name include slash (e.g. /) 68 | ## - if not auto-add openfootball/ (default) 69 | if name_easy.index( '/' ).nil? 70 | if opts[:format] == 'csv' 71 | name = "footballcsv/#{name_easy}" 72 | else 73 | name = "openfootball/#{name_easy}" 74 | end 75 | else 76 | name = name_easy ## just pass through for now 77 | end 78 | 79 | super( name, opts ) 80 | 81 | ### check for known datasets; warn: if not known (might be a typo) 82 | unless self.class.known_datasets.include?( name ) 83 | ## todo: use logger - why, why not?? 84 | puts "*** warn: unknown football dataset '#{name}', typo ???" 85 | end 86 | end 87 | end # class FootballDataset 88 | 89 | 90 | class BeerDataset < Dataset 91 | def initialize( name_easy, opts={} ) 92 | 93 | ## check if name include slash (e.g. /) 94 | ## - if not auto-add openbeer/ (default) 95 | if name_easy.index( '/' ).nil? 96 | name = "openbeer/#{name_easy}" 97 | else 98 | name = name_easy ## just pass through for now 99 | end 100 | 101 | super( name, opts ) 102 | end 103 | end # class BeerDataset 104 | end # module Datafile 105 | 106 | 107 | ## note: add "known" dataset classes to global namespace - why? why not? 108 | ## add "global" shortcuts for now 109 | WorldDataset = Datafile::WorldDataset 110 | FootballDataset = Datafile::FootballDataset 111 | BeerDataset = Datafile::BeerDataset 112 | -------------------------------------------------------------------------------- /datafile/attic/lib/datafile.rb: -------------------------------------------------------------------------------- 1 | 2 | class Script 3 | include LogUtils::Logging 4 | 5 | def initialize( proc ) 6 | @proc = proc 7 | end 8 | 9 | def call 10 | logger.info( "[script] calling calc block" ) 11 | @proc.call 12 | end 13 | 14 | def dump 15 | puts " script: #{@proc.inspect}" 16 | end 17 | end ## class Script 18 | 19 | 20 | 21 | ### todo/check: use Script for Inline too?? - why, why not??? 22 | ### - use setup/pre/before and post/after or something?? 23 | ## - note: for now always is pre/before 24 | 25 | class Inline 26 | include LogUtils::Logging 27 | 28 | def initialize( proc ) 29 | @proc = proc 30 | end 31 | 32 | def call 33 | logger.info( "[inline] calling script block" ) 34 | @proc.call 35 | end 36 | 37 | def dump 38 | puts " script: #{@proc.inspect}" 39 | end 40 | end ## class Inline 41 | 42 | 43 | class Datafile 44 | 45 | 46 | ## another convenience method - use like Datafile.load() 47 | def self.load( code ) 48 | builder = Builder.new 49 | builder.instance_eval( code ) 50 | 51 | # Note: return datafile (of course, NOT the builder) 52 | # if you want a builder use Datafile::Builder ;-) 53 | datafile = builder.datafile 54 | ## check for auto-configure (just guessing) 55 | ## zip or file worker 56 | datafile.guess_file_or_zip_worker 57 | datafile 58 | end 59 | 60 | 61 | def guess_file_or_zip_worker ## change/rename to configure_file_or_zip_worker - why? why not?? 62 | ## if opts file or zip exists do NOT change (assume set manually) 63 | return if @opts[:file] || @opts[:zip] 64 | 65 | ## for now only change if single (just 1) dataset and it's present 66 | if @datasets.size == 1 && @datasets[0].file? 67 | puts " bingo!! assume (in-situ) datafile; use file workers" 68 | @worker = FileWorker.new( self ) 69 | end 70 | end 71 | 72 | 73 | 74 | attr_reader :scripts ## calc(ulation) scripts (calc blocks) 75 | attr_reader :inlines ## inline script blocks -- use before?? run before datasets 76 | attr_reader :name 77 | attr_reader :deps ## dep(endencies) 78 | 79 | def initialize( opts={} ) 80 | @scripts = [] ## calculation scripts (calc blocks) 81 | @inlines = [] ## inline (setup) scripts (run before reading datasets) 82 | 83 | ## (target)name - return nil if noname (set/defined/assigned) 84 | @name = opts[:name] || nil 85 | ## deps (dependencies) - note: always returns an array (empty array if no deps) 86 | @deps = opts[:deps] || [] 87 | end 88 | 89 | def run 90 | logger.info( "[datafile] begin - run" ) 91 | download # step 1 - download zips for datasets 92 | read # step 2 - read in datasets from zips - note: includes running inlines 93 | calc # step 3 - run calc(ulations) scripts 94 | logger.info( "[datafile] end - run" ) 95 | end 96 | 97 | def calc 98 | logger.info( "[datafile] calc" ) 99 | @worker.calc 100 | end 101 | 102 | =begin 103 | def download_world ## only dl world datasets (skip all others) 104 | logger.info( "[datafile] dowload world datasets" ) 105 | @datasets.each do |dataset| 106 | if dataset.kind_of? WorldDataset 107 | dataset.download() 108 | else 109 | # skip all others 110 | end 111 | end 112 | end 113 | 114 | def download_beer ## only dl beer datasets (skip all others) 115 | logger.info( "[datafile] dowload beer datasets" ) 116 | @datasets.each do |dataset| 117 | if dataset.kind_of? BeerDataset 118 | dataset.download() 119 | else 120 | # skip all others 121 | end 122 | end 123 | end 124 | 125 | def download_football ## only dl football datasets (skip all others) 126 | logger.info( "[datafile] dowload football datasets" ) 127 | @datasets.each do |dataset| 128 | if dataset.kind_of? FootballDataset 129 | dataset.download() 130 | else 131 | # skip all others 132 | end 133 | end 134 | end 135 | =end 136 | 137 | 138 | =begin 139 | def read_world 140 | logger.info( "[datafile] read world datasets" ) 141 | @datasets.each do |dataset| 142 | if dataset.kind_of?( WorldDataset ) 143 | dataset.read() 144 | else 145 | # skip all others 146 | end 147 | end 148 | end 149 | 150 | def read_beer 151 | logger.info( "[datafile] read beer datasets" ) 152 | @datasets.each do |dataset| 153 | if dataset.kind_of?( BeerDataset ) 154 | dataset.read() 155 | else 156 | # skip all others 157 | end 158 | end 159 | end 160 | 161 | def read_football 162 | logger.info( "[datafile] read football datasets" ) 163 | @datasets.each do |dataset| 164 | if dataset.kind_of?( FootballDataset ) 165 | dataset.read() 166 | else 167 | # skip all others 168 | end 169 | end 170 | end 171 | =end 172 | 173 | end 174 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | CC0 1.0 Universal 2 | 3 | Statement of Purpose 4 | 5 | The laws of most jurisdictions throughout the world automatically confer 6 | exclusive Copyright and Related Rights (defined below) upon the creator and 7 | subsequent owner(s) (each and all, an "owner") of an original work of 8 | authorship and/or a database (each, a "Work"). 9 | 10 | Certain owners wish to permanently relinquish those rights to a Work for the 11 | purpose of contributing to a commons of creative, cultural and scientific 12 | works ("Commons") that the public can reliably and without fear of later 13 | claims of infringement build upon, modify, incorporate in other works, reuse 14 | and redistribute as freely as possible in any form whatsoever and for any 15 | purposes, including without limitation commercial purposes. These owners may 16 | contribute to the Commons to promote the ideal of a free culture and the 17 | further production of creative, cultural and scientific works, or to gain 18 | reputation or greater distribution for their Work in part through the use and 19 | efforts of others. 20 | 21 | For these and/or other purposes and motivations, and without any expectation 22 | of additional consideration or compensation, the person associating CC0 with a 23 | Work (the "Affirmer"), to the extent that he or she is an owner of Copyright 24 | and Related Rights in the Work, voluntarily elects to apply CC0 to the Work 25 | and publicly distribute the Work under its terms, with knowledge of his or her 26 | Copyright and Related Rights in the Work and the meaning and intended legal 27 | effect of CC0 on those rights. 28 | 29 | 1. Copyright and Related Rights. A Work made available under CC0 may be 30 | protected by copyright and related or neighboring rights ("Copyright and 31 | Related Rights"). Copyright and Related Rights include, but are not limited 32 | to, the following: 33 | 34 | i. the right to reproduce, adapt, distribute, perform, display, communicate, 35 | and translate a Work; 36 | 37 | ii. moral rights retained by the original author(s) and/or performer(s); 38 | 39 | iii. publicity and privacy rights pertaining to a person's image or likeness 40 | depicted in a Work; 41 | 42 | iv. rights protecting against unfair competition in regards to a Work, 43 | subject to the limitations in paragraph 4(a), below; 44 | 45 | v. rights protecting the extraction, dissemination, use and reuse of data in 46 | a Work; 47 | 48 | vi. database rights (such as those arising under Directive 96/9/EC of the 49 | European Parliament and of the Council of 11 March 1996 on the legal 50 | protection of databases, and under any national implementation thereof, 51 | including any amended or successor version of such directive); and 52 | 53 | vii. other similar, equivalent or corresponding rights throughout the world 54 | based on applicable law or treaty, and any national implementations thereof. 55 | 56 | 2. Waiver. To the greatest extent permitted by, but not in contravention of, 57 | applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and 58 | unconditionally waives, abandons, and surrenders all of Affirmer's Copyright 59 | and Related Rights and associated claims and causes of action, whether now 60 | known or unknown (including existing as well as future claims and causes of 61 | action), in the Work (i) in all territories worldwide, (ii) for the maximum 62 | duration provided by applicable law or treaty (including future time 63 | extensions), (iii) in any current or future medium and for any number of 64 | copies, and (iv) for any purpose whatsoever, including without limitation 65 | commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes 66 | the Waiver for the benefit of each member of the public at large and to the 67 | detriment of Affirmer's heirs and successors, fully intending that such Waiver 68 | shall not be subject to revocation, rescission, cancellation, termination, or 69 | any other legal or equitable action to disrupt the quiet enjoyment of the Work 70 | by the public as contemplated by Affirmer's express Statement of Purpose. 71 | 72 | 3. Public License Fallback. Should any part of the Waiver for any reason be 73 | judged legally invalid or ineffective under applicable law, then the Waiver 74 | shall be preserved to the maximum extent permitted taking into account 75 | Affirmer's express Statement of Purpose. In addition, to the extent the Waiver 76 | is so judged Affirmer hereby grants to each affected person a royalty-free, 77 | non transferable, non sublicensable, non exclusive, irrevocable and 78 | unconditional license to exercise Affirmer's Copyright and Related Rights in 79 | the Work (i) in all territories worldwide, (ii) for the maximum duration 80 | provided by applicable law or treaty (including future time extensions), (iii) 81 | in any current or future medium and for any number of copies, and (iv) for any 82 | purpose whatsoever, including without limitation commercial, advertising or 83 | promotional purposes (the "License"). The License shall be deemed effective as 84 | of the date CC0 was applied by Affirmer to the Work. Should any part of the 85 | License for any reason be judged legally invalid or ineffective under 86 | applicable law, such partial invalidity or ineffectiveness shall not 87 | invalidate the remainder of the License, and in such case Affirmer hereby 88 | affirms that he or she will not (i) exercise any of his or her remaining 89 | Copyright and Related Rights in the Work or (ii) assert any associated claims 90 | and causes of action with respect to the Work, in either case contrary to 91 | Affirmer's express Statement of Purpose. 92 | 93 | 4. Limitations and Disclaimers. 94 | 95 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 96 | surrendered, licensed or otherwise affected by this document. 97 | 98 | b. Affirmer offers the Work as-is and makes no representations or warranties 99 | of any kind concerning the Work, express, implied, statutory or otherwise, 100 | including without limitation warranties of title, merchantability, fitness 101 | for a particular purpose, non infringement, or the absence of latent or 102 | other defects, accuracy, or the present or absence of errors, whether or not 103 | discoverable, all to the greatest extent permissible under applicable law. 104 | 105 | c. Affirmer disclaims responsibility for clearing rights of other persons 106 | that may apply to the Work or any use thereof, including without limitation 107 | any person's Copyright and Related Rights in the Work. Further, Affirmer 108 | disclaims responsibility for obtaining any necessary consents, permissions 109 | or other rights required for any use of the Work. 110 | 111 | d. Affirmer understands and acknowledges that Creative Commons is not a 112 | party to this document and has no duty or obligation with respect to this 113 | CC0 or use of the Work. 114 | 115 | For more information, please see 116 | 117 | --------------------------------------------------------------------------------