├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .yardopts ├── Appraisals ├── Gemfile ├── LICENSE ├── README.md ├── Rakefile ├── bin └── oai ├── examples ├── models │ └── file_model.rb └── providers │ └── dublin_core.rb ├── gemfiles ├── rails_60.gemfile ├── rails_61.gemfile ├── rails_70.gemfile ├── rails_71.gemfile ├── rails_72.gemfile └── rails_80.gemfile ├── lib ├── oai.rb ├── oai │ ├── client.rb │ ├── client │ │ ├── get_record.rb │ │ ├── header.rb │ │ ├── identify.rb │ │ ├── list_identifiers.rb │ │ ├── list_metadata_formats.rb │ │ ├── list_records.rb │ │ ├── list_sets.rb │ │ ├── metadata_format.rb │ │ ├── record.rb │ │ ├── response.rb │ │ └── resumable.rb │ ├── constants.rb │ ├── exception.rb │ ├── harvester.rb │ ├── harvester │ │ ├── config.rb │ │ ├── harvest.rb │ │ ├── logging.rb │ │ ├── mailer.rb │ │ └── shell.rb │ ├── provider.rb │ ├── provider │ │ ├── metadata_format.rb │ │ ├── metadata_format │ │ │ └── oai_dc.rb │ │ ├── model.rb │ │ ├── model │ │ │ ├── activerecord_caching_wrapper.rb │ │ │ └── activerecord_wrapper.rb │ │ ├── partial_result.rb │ │ ├── response.rb │ │ ├── response │ │ │ ├── error.rb │ │ │ ├── get_record.rb │ │ │ ├── identify.rb │ │ │ ├── list_identifiers.rb │ │ │ ├── list_metadata_formats.rb │ │ │ ├── list_records.rb │ │ │ ├── list_sets.rb │ │ │ └── record_response.rb │ │ └── resumption_token.rb │ ├── set.rb │ └── xpath.rb └── test.rb ├── ruby-oai.gemspec ├── test ├── activerecord_provider │ ├── config │ │ └── connection.rb │ ├── database │ │ └── 0001_oaipmh_tables.rb │ ├── fixtures │ │ └── dc.yml │ ├── helpers │ │ ├── providers.rb │ │ ├── set_provider.rb │ │ └── transactional_test_case.rb │ ├── models │ │ ├── dc_field.rb │ │ ├── dc_lang.rb │ │ ├── dc_set.rb │ │ ├── exclusive_set_dc_field.rb │ │ └── oai_token.rb │ ├── tc_activerecord_wrapper.rb │ ├── tc_ar_provider.rb │ ├── tc_ar_sets_provider.rb │ ├── tc_caching_paging_provider.rb │ ├── tc_simple_paging_provider.rb │ └── test_helper_ar_provider.rb ├── client │ ├── helpers │ │ ├── provider.rb │ │ └── test_wrapper.rb │ ├── tc_exception.rb │ ├── tc_get_record.rb │ ├── tc_http_client.rb │ ├── tc_identify.rb │ ├── tc_libxml.rb │ ├── tc_list_identifiers.rb │ ├── tc_list_metadata_formats.rb │ ├── tc_list_records.rb │ ├── tc_list_sets.rb │ ├── tc_low_resolution_dates.rb │ ├── tc_utf8_escaping.rb │ ├── tc_xpath.rb │ └── test_helper_client.rb ├── harvester │ ├── tc_harvest.rb │ └── test_helper_harvester.rb ├── provider │ ├── models.rb │ ├── tc_exceptions.rb │ ├── tc_functional_tokens.rb │ ├── tc_instance_provider.rb │ ├── tc_provider.rb │ ├── tc_resumption_tokens.rb │ ├── tc_simple_provider.rb │ └── test_helper_provider.rb └── test.xml └── tools └── generate_fixtures.rb /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [ '*' ] 6 | 7 | jobs: 8 | test_matrix: 9 | runs-on: ubuntu-latest 10 | strategy: 11 | fail-fast: false 12 | matrix: 13 | ruby-version: ['2.7.6', 'jruby-9.4.11.0'] 14 | gemfile: 15 | - rails_60 16 | - rails_61 17 | include: 18 | - ruby-version: 2.7.6 19 | gemfile: rails_70 20 | - ruby-version: 3.1.5 21 | gemfile: rails_71 22 | - ruby-version: 3.2.7 23 | gemfile: rails_72 24 | - ruby-version: 3.3.7 25 | gemfile: rails_80 26 | 27 | 28 | 29 | 30 | env: 31 | RAILS_ENV: test 32 | BUNDLE_GEMFILE: ${{ github.workspace }}/gemfiles/${{ matrix.gemfile }}.gemfile 33 | 34 | steps: 35 | - uses: actions/checkout@v2 36 | - name: Add --no-document option to .gemrc file to speed up bundle install 37 | run: "echo 'gem: --no-document' > ~/.gemrc" 38 | - name: Set up Ruby 39 | uses: ruby/setup-ruby@v1 40 | with: 41 | ruby-version: ${{ matrix.ruby-version }} 42 | bundler-cache: true # runs 'bundle install' and caches installed gems automatically 43 | - name: Run CI task 44 | run: bundle exec rake 45 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /pkg/ 2 | /doc/ 3 | /coverage* 4 | /tmp/ 5 | /.yardoc/ 6 | .DS_Store 7 | # Exclude Gemfile.lock (best practice for gems) 8 | Gemfile.lock 9 | 10 | # Exclude appraisal .locks. per appraisal readme advice 11 | /gemfiles/*.lock 12 | -------------------------------------------------------------------------------- /.yardopts: -------------------------------------------------------------------------------- 1 | -m markdown 2 | -------------------------------------------------------------------------------- /Appraisals: -------------------------------------------------------------------------------- 1 | # https://github.com/thoughtbot/appraisal 2 | 3 | appraise "rails-60" do 4 | gem 'activerecord', '~> 6.0.0' 5 | 6 | # https://github.com/rails/rails/issues/54271 7 | gem "concurrent-ruby", "< 1.3.5" 8 | 9 | gem 'sqlite3', ">= 1.4.0", "< 2", :platform => [:ruby, :mswin] 10 | end 11 | 12 | appraise "rails-61" do 13 | gem 'activerecord', '~> 6.1.0' 14 | 15 | # https://github.com/rails/rails/issues/54271 16 | gem "concurrent-ruby", "< 1.3.5" 17 | 18 | gem 'sqlite3', ">= 1.4.0", "< 2", :platform => [:ruby, :mswin] 19 | end 20 | 21 | appraise "rails-70" do 22 | gem 'activerecord', '~> 7.0.0' 23 | 24 | # https://github.com/rails/rails/issues/54271 25 | gem "concurrent-ruby", "< 1.3.5" 26 | 27 | gem 'sqlite3', ">= 1.4.0", "< 2", :platform => [:ruby, :mswin] 28 | end 29 | 30 | appraise "rails-71" do 31 | gem 'activerecord', '~> 7.1.0' 32 | 33 | gem 'sqlite3', ">= 1.4.0", "< 3.0", :platform => [:ruby, :mswin] 34 | end 35 | 36 | appraise "rails-72" do 37 | gem 'activerecord', '~> 7.2.0' 38 | 39 | gem 'sqlite3', ">= 1.4.0", "< 3.0", :platform => [:ruby, :mswin] 40 | end 41 | 42 | appraise "rails-80" do 43 | gem 'activerecord', '~> 8.0.0' 44 | 45 | gem 'sqlite3', ">= 1.4.0", "< 3.0", :platform => [:ruby, :mswin] 46 | end 47 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "http://rubygems.org" 2 | 3 | gemspec 4 | 5 | gem 'jruby-openssl', :platform => :jruby 6 | 7 | group :test do 8 | gem 'activerecord-jdbcsqlite3-adapter', :platform => [:jruby] 9 | gem 'libxml-ruby', :platform => [:ruby, :mswin] 10 | gem 'rake' 11 | gem 'yard' 12 | gem 'redcarpet', :platform => :ruby # For fast, Github-like Markdown 13 | gem 'kramdown', :platform => :jruby # For Markdown without a C compiler 14 | gem 'test-unit' 15 | end 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2006 Ed Summers, Will Groppe 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ruby-oai 2 | ======== 3 | [![Build Status](https://github.com/code4lib/ruby-oai/workflows/CI/badge.svg)](https://github.com/code4lib/ruby-oai/actions) 4 | 5 | [![Gem Version](https://badge.fury.io/rb/oai.svg)](https://badge.fury.io/rb/oai) 6 | 7 | ruby-oai is a Open Archives Protocol for Metadata Harvesting (OAI-PMH) 8 | library for Ruby. [OAI-PMH](http://openarchives.org) is a somewhat 9 | archaic protocol for sharing metadata between digital library repositories. 10 | If you are looking to share metadata on the web you are probably better off 11 | using a feed format like [RSS](http://www.rssboard.org/rss-specification) or 12 | [Atom](http://www.atomenabled.org/). If have to work with a backwards 13 | digital repository that only offers OAI-PMH access then ruby-oai is your 14 | friend. 15 | 16 | The [OAI-PMH](http://openarchives.org) spec defines six verbs 17 | (`Identify`, `ListIdentifiers`, `ListRecords`, 18 | `GetRecords`, `ListSets`, `ListMetadataFormat`) used for discovery and sharing of 19 | metadata. 20 | 21 | The ruby-oai gem includes a client library, a server/provider library and 22 | a interactive harvesting shell. 23 | 24 | Client 25 | ------ 26 | 27 | The OAI client library is used for harvesting metadata from repositories. 28 | For example to initiate a ListRecords request to pubmed you can: 29 | 30 | ```ruby 31 | require 'oai' 32 | client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi', :headers => { "From" => "oai@example.com" } 33 | response = client.list_records 34 | # Get the first page of records 35 | response.each do |record| 36 | puts record.metadata 37 | end 38 | # Get the second page of records 39 | response = client.list_records(:resumption_token => response.resumption_token) 40 | response.each do |record| 41 | puts record.metadata 42 | end 43 | # Get all pages together (may take a *very* long time to complete) 44 | client.list_records.full.each do |record| 45 | puts record.metadata 46 | end 47 | ``` 48 | 49 | ### Retry-After 50 | This library depends on faraday, but allows a wide range of versions. Depending on the client application's installed version of faraday, there may be different middleware libraries required to support automatically retrying requests that are rate limited/denied with a `Retry-After` header. The OAI client can, however, accept an externally configured faraday http client for handling this. For example, to retry on `429 Too Many Requests`: 51 | 52 | ```ruby 53 | require 'oai' 54 | require 'faraday_middleware' # if using faraday version < 2 55 | http_client = Faraday.new do |conn| 56 | conn.request(:retry, max: 5, retry_statuses: 429) 57 | conn.response(:follow_redirects, limit: 5) 58 | conn.adapter :net_http 59 | end 60 | client = OAI::Client.new(base_url, http: http_client) 61 | opts = {from:'2012-03-01', until:'2012-04-01', metadata_prefix:'oai_dc'} 62 | puts client.list_records(opts).full.count 63 | ``` 64 | 65 | See {OAI::Client} for more details 66 | 67 | Server 68 | ------ 69 | 70 | The OAI provider library handles serving local content to other clients. Here's how to set up a simple provider: 71 | 72 | ```ruby 73 | class MyProvider < Oai::Provider 74 | repository_name 'My little OAI provider' 75 | repository_url 'http://localhost/provider' 76 | record_prefix 'oai:localhost' 77 | admin_email 'root@localhost' # String or Array 78 | source_model MyModel.new # Subclass of OAI::Provider::Model 79 | end 80 | ``` 81 | 82 | See comment docs at top of [OAI::Provider](./lib/oai/provider.rb) for more details, including discussion of the `OAI::Provider::ActiveRecordWrapper` class for quich setup of an OAI provider for an ActiveRecord model class (single database table) 83 | 84 | Interactive Harvester 85 | --------------------- 86 | 87 | The OAI-PMH client shell allows OAI Harvesting to be configured in an interactive manner. Typing `oai` on the command line starts the shell. After initial configuration, the shell can be used to manage harvesting operations. 88 | 89 | See {OAI::Harvester::Shell} for more details 90 | 91 | Installation 92 | ------------ 93 | 94 | Normally the best way to install oai is as part of your `Gemfile`: 95 | 96 | source :rubygems 97 | gem 'oai' 98 | 99 | Alternately it can be installed globally using RubyGems: 100 | 101 | $ gem install oai 102 | 103 | Running tests 104 | ------------- 105 | 106 | Tests are with Test::Unit, in a somewhat archaic/legacy style. Test setup especially is not how we would do things today. Run all tests with: 107 | 108 | $ bundle exec rake test 109 | 110 | There are also convenience tasks to run subsets of tests. 111 | 112 | We use [appraisal](https://github.com/thoughtbot/appraisal) to test ActiveRecord-related functionality under multiple versions of ActiveRecord. While the above commands will test with latest ActiveRecord (allowed in our .gemspec development dependency), you can test under a particular version defined in the [Appraisals](./Appraisals) file like so: 113 | 114 | $ bundle exec appraisal rails-52 rake test 115 | $ bundle exec appraisal rails-70 rake test 116 | 117 | If you run into trouble with appraisal's gemfiles getting out of date and bundler complaining, 118 | try: 119 | 120 | $ bundle exec appraisal clean 121 | $ appraisal generate 122 | 123 | That may make changes to appraisal gemfiles that you should commit to repo. 124 | 125 | License 126 | ------- 127 | 128 | [MIT](./LICENSE) 129 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | 2 | require 'rubygems' 3 | require 'rake' 4 | begin 5 | require 'bundler/setup' 6 | rescue LoadError 7 | puts 'You must `gem install bundler` and `bundle install` to run rake tasks' 8 | end 9 | 10 | Bundler::GemHelper.install_tasks 11 | 12 | require 'rake/testtask' 13 | require 'yard' 14 | 15 | task :default => ["test", "yard"] 16 | 17 | Rake::TestTask.new('test') do |t| 18 | t.description = "Run all Test::Unit tests" 19 | 20 | t.libs << ['lib', 'test/client', 'test/provider', 'test/activerecord_provider'] 21 | 22 | t.pattern = 'test/{client,provider,activerecord_provider}/tc_*.rb' 23 | #t.verbose = true 24 | t.warning = false 25 | end 26 | 27 | 28 | # To run just subsets of tests 29 | namespace :test do 30 | Rake::TestTask.new('client') do |t| 31 | t.libs << ['lib', 'test/client'] 32 | t.pattern = 'test/client/tc_*.rb' 33 | #t.verbose = true 34 | t.warning = false 35 | end 36 | 37 | Rake::TestTask.new('harvester') do |t| 38 | t.libs << ['lib', 'test/harvester'] 39 | t.pattern = 'test/harvester/tc_*.rb' 40 | #t.verbose = true 41 | t.warning = false 42 | end 43 | 44 | Rake::TestTask.new('provider') do |t| 45 | t.libs << ['lib', 'test/provider'] 46 | t.pattern = 'test/provider/tc_*.rb' 47 | #t.verbose = true 48 | t.warning = false 49 | end 50 | 51 | Rake::TestTask.new('activerecord_provider') do |t| 52 | t.description = "Active Record base Provider Tests" 53 | 54 | t.libs << ['lib', 'test/activerecord_provider'] 55 | t.pattern = 'test/activerecord_provider/tc_*.rb' 56 | #t.verbose = true 57 | t.warning = false 58 | end 59 | end 60 | 61 | YARD::Rake::YardocTask.new do |t| 62 | t.files = ["lib/**/*.rb"] 63 | t.options = ['--output-dir', 'doc'] 64 | end 65 | -------------------------------------------------------------------------------- /bin/oai: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby -rubygems 2 | # 3 | # Created by William Groppe on 2006-11-05. 4 | # Copyright (c) 2006. All rights reserved. 5 | 6 | require 'optparse' 7 | 8 | require 'oai/harvester' 9 | 10 | include OAI::Harvester 11 | 12 | conf = OAI::Harvester::Config.load 13 | 14 | startup = :interactive 15 | 16 | rexml = false 17 | 18 | opts = OptionParser.new do |opts| 19 | opts.banner = "Usage: oai ..." 20 | opts.define_head "#{File.basename($0)}, a OAI harvester shell." 21 | opts.separator "" 22 | opts.separator "Options:" 23 | 24 | opts.on("-D", "--daemon", "Non-interactive mode, to be called via scheduler") { startup = :daemon } 25 | opts.on("-R", "--rexml", "Use rexml even if libxml is available") { rexml = true } 26 | opts.on("-?", "--help", "Show this message") do 27 | puts opts 28 | exit 29 | end 30 | 31 | # Another typical switch to print the version. 32 | opts.on_tail("-v", "--version", "Show version") do 33 | class << Gem; attr_accessor :loaded_specs; end 34 | puts Gem.loaded_specs['oai'].version 35 | exit 36 | end 37 | end 38 | 39 | begin 40 | opts.parse! ARGV 41 | rescue 42 | puts opts 43 | exit 44 | end 45 | 46 | unless rexml 47 | begin # Try to load libxml to speed up harvesting 48 | require 'xml/libxml' 49 | rescue LoadError 50 | end 51 | end 52 | 53 | case startup 54 | when :interactive 55 | shell = Shell.new(conf) 56 | shell.start 57 | when :daemon 58 | if conf.storage 59 | harvest = Harvest.new(conf) 60 | harvest.start(harvestable_sites(conf)) 61 | else 62 | puts "Missing or corrupt configuration file, cannot continue." 63 | exit(-1) 64 | end 65 | end 66 | -------------------------------------------------------------------------------- /examples/models/file_model.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # 3 | # Created by William Groppe on 2007-02-01. 4 | # 5 | # Simple file based Model. Basically just serves a directory of xml files to the 6 | # Provider. 7 | # 8 | class File 9 | def id 10 | File.basename(self.path) 11 | end 12 | 13 | def to_oai_dc 14 | self.read 15 | end 16 | end 17 | 18 | class FileModel < OAI::Provider::Model 19 | include OAI::Provider 20 | 21 | def initialize(directory = 'data') 22 | # nil specifies no partial results aka resumption tokens, and 'mtime' is the 23 | # method that the provider will call for determining the timestamp 24 | super(nil, 'mtime') 25 | @directory = directory 26 | end 27 | 28 | def earliest 29 | e = Dir["#{@directory}/*.xml"].min { |a,b| File.stat(a).mtime <=> File.stat(b).mtime } 30 | File.stat(e).mtime.utc.xmlschema 31 | end 32 | 33 | def latest 34 | e = Dir["#{@directory}/*.xml"].max { |a,b| File.stat(a).mtime <=> File.stat(b).mtime } 35 | File.stat(e).mtime.utc.xmlschema 36 | end 37 | 38 | def sets 39 | nil 40 | end 41 | 42 | def find(selector, opts={}) 43 | return nil unless selector 44 | 45 | case selector 46 | when :all 47 | records = Dir["#{@directory}/*.xml"].sort.collect do |file| 48 | File.new(file) unless File.stat(file).mtime.utc < opts[:from].to_time or 49 | File.stat(file).mtime.utc > opts[:until].to_time 50 | end 51 | records 52 | else 53 | Find.find("#{@directory}/#{selector}") rescue nil 54 | end 55 | end 56 | 57 | end 58 | 59 | # == Example Usage: 60 | # class FileProvider < OAI::Provider::Base 61 | # repository_name 'XML File Provider' 62 | # source_model FileModel.new('/tmp') 63 | # end -------------------------------------------------------------------------------- /examples/providers/dublin_core.rb: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/ruby -rubygems 2 | require 'camping' 3 | require 'camping/session' 4 | require 'oai/provider' 5 | 6 | # Extremely simple demo Camping application to illustrate OAI Provider integration 7 | # with Camping. 8 | # 9 | # William Groppe 2/1/2007 10 | # 11 | 12 | Camping.goes :DublinCore 13 | 14 | module DublinCore 15 | include Camping::Session 16 | 17 | FIELDS = ['title', 'creator', 'subject', 'description', 18 | 'publisher', 'contributor', 'date', 'type', 'format', 19 | 'identifier', 'source', 'language', 'relation', 'coverage', 'rights'] 20 | 21 | def DublinCore.create 22 | Camping::Models::Session.create_schema 23 | DublinCore::Models.create_schema :assume => 24 | (DublinCore::Models::Obj.table_exists? ? 1.0 : 0.0) 25 | end 26 | 27 | end 28 | 29 | module DublinCore::Models 30 | Base.logger = Logger.new("dublin_core.log") 31 | Base.inheritance_column = 'field_type' 32 | Base.default_timezone = :utc 33 | 34 | class Obj < Base # since Object is reserved 35 | has_and_belongs_to_many :fields, :join_table => 'dublincore_field_links', 36 | :foreign_key => 'obj_id', :association_foreign_key => 'field_id' 37 | DublinCore::FIELDS.each do |field| 38 | class_eval(%{ 39 | def #{field.pluralize} 40 | fields.select do |f| 41 | f if f.field_type == "DC#{field.capitalize}" 42 | end 43 | end 44 | }); 45 | end 46 | end 47 | 48 | class Field < Base 49 | has_and_belongs_to_many :objs, :join_table => 'dublincore_field_links', 50 | :foreign_key => 'field_id', :association_foreign_key => 'obj_id' 51 | validates_presence_of :field_type, :message => "can't be blank" 52 | 53 | # Support sorting by value 54 | def <=>(other) 55 | self.to_s <=> other.to_s 56 | end 57 | 58 | def to_s 59 | value 60 | end 61 | end 62 | 63 | DublinCore::FIELDS.each do |field| 64 | module_eval(%{ 65 | class DC#{field.capitalize} < Field; end 66 | }) 67 | end 68 | 69 | # OAI Provider configuration 70 | class CampingProvider < OAI::Provider::Base 71 | repository_name 'Camping Test OAI Repository' 72 | source_model ActiveRecordWrapper.new(Obj) 73 | end 74 | 75 | class CreateTheBasics < V 1.0 76 | def self.up 77 | create_table :dublincore_objs, :force => true do |t| 78 | t.column :source, :string 79 | t.column :created_at, :datetime 80 | t.column :updated_at, :datetime 81 | end 82 | 83 | create_table :dublincore_field_links, :id => false, :force => true do |t| 84 | t.column :obj_id, :integer, :null => false 85 | t.column :field_id, :integer, :null => false 86 | end 87 | 88 | create_table :dublincore_fields, :force => true do |t| 89 | t.column :field_type, :string, :limit => 30, :null => false 90 | t.column :value, :text, :null => false 91 | end 92 | 93 | add_index :dublincore_fields, [:field_type, :value], :uniq => true 94 | add_index :dublincore_field_links, :field_id 95 | add_index :dublincore_field_links, [:obj_id, :field_id] 96 | end 97 | 98 | def self.down 99 | drop_table :dublincore_objs 100 | drop_table :dublincore_field_links 101 | drop_table :dublincore_fields 102 | end 103 | end 104 | 105 | end 106 | 107 | module DublinCore::Controllers 108 | 109 | # Now setup a URL('/oai' by default) to handle OAI requests 110 | class Oai 111 | def get 112 | @headers['Content-Type'] = 'text/xml' 113 | provider = Models::CampingProvider.new 114 | provider.process_request(@input.merge(:url => "http:"+URL(Oai).to_s)) 115 | end 116 | end 117 | 118 | class Index < R '/', '/browse/(\w+)', '/browse/(\w+)/page/(\d+)' 119 | def get(field = nil, page = 1) 120 | @field = field 121 | @page = page.to_i 122 | @browse = {} 123 | if !@field 124 | FIELDS.each do |field| 125 | @browse[field] = Field.count( 126 | :conditions => ["field_type = ?", "DC#{field.capitalize}"]) 127 | end 128 | @home = true 129 | @count = @browse.keys.size 130 | else 131 | @count = Field.count(:conditions => ["field_type = ?", "DC#{@field.capitalize}"]) 132 | fields = Field.find(:all, 133 | :conditions => ["field_type = ?", "DC#{@field.capitalize}"], 134 | :order => "value asc", :limit => DublinCore::LIMIT, 135 | :offset => (@page - 1) * DublinCore::LIMIT) 136 | 137 | fields.each do |field| 138 | @browse[field] = field.objs.size 139 | end 140 | end 141 | render :browse 142 | end 143 | end 144 | 145 | class Search < R '/search', '/search/page/(\d+)' 146 | 147 | def get(page = 1) 148 | @page = page.to_i 149 | if input.terms 150 | @state.terms = input.terms if input.terms 151 | 152 | start = Time.now 153 | ids = search(input.terms, @page - 1) 154 | finish = Time.now 155 | @search_time = (finish - start) 156 | @objs = Obj.find(ids) 157 | else 158 | @count = 0 159 | @objs = [] 160 | end 161 | 162 | render :search 163 | end 164 | 165 | end 166 | 167 | class LinkedTo < R '/linked/(\d+)', '/linked/(\d+)/page/(\d+)' 168 | def get(field, page = 1) 169 | @page = page.to_i 170 | @field = field 171 | @count = Field.find(field).objs.size 172 | @objs = Field.find(field).objs.find(:all, 173 | :limit => DublinCore::LIMIT, 174 | :offset => (@page - 1) * DublinCore::LIMIT) 175 | render :records 176 | end 177 | end 178 | 179 | class Add 180 | def get 181 | @obj = Obj.create 182 | render :edit 183 | end 184 | end 185 | 186 | class View < R '/view/(\d+)' 187 | def get obj_id 188 | obj = Obj.find(obj_id) 189 | # Get rid of completely empty records 190 | obj.destroy if obj.fields.empty? 191 | 192 | @count = 1 193 | @objs = [obj] 194 | if Obj.exists?(obj.id) 195 | render :records if Obj.exists?(obj.id) 196 | else 197 | redirect Index 198 | end 199 | end 200 | end 201 | 202 | class Edit < R '/edit', '/edit/(\d+)' 203 | def get obj_id 204 | @obj = Obj.find obj_id 205 | render :edit 206 | end 207 | 208 | def post 209 | case input.action 210 | when 'Save' 211 | @obj = Obj.find input.obj_id 212 | @obj.fields.clear 213 | input.keys.each do |key| 214 | next unless key =~ /^DublinCore::Models::\w+/ 215 | next unless input[key] && !input[key].empty? 216 | input[key].to_a.each do |value| 217 | @obj.fields << key.constantize.find_or_create_by_value(value) 218 | end 219 | end 220 | redirect View, @obj 221 | when 'Discard' 222 | @obj = Obj.find input.obj_id 223 | 224 | # Get rid of completely empty records 225 | @obj.destroy if @obj.fields.empty? 226 | 227 | if Obj.exists?(@obj.id) 228 | redirect View, @obj 229 | else 230 | redirect Index 231 | end 232 | when 'Delete' 233 | Obj.find(input.obj_id).destroy 234 | render :delete_success 235 | end 236 | end 237 | end 238 | 239 | class DataAdd < R '/data/add' 240 | def post 241 | if input.field_value && !input.field_value.empty? 242 | model = "DublinCore::Models::#{input.field_type}".constantize 243 | obj = Obj.find(input.obj_id) 244 | obj.fields << model.find_or_create_by_value(input.field_value) 245 | end 246 | redirect Edit, input.obj_id 247 | end 248 | end 249 | 250 | class Style < R '/styles.css' 251 | def get 252 | @headers["Content-Type"] = "text/css; charset=utf-8" 253 | @body = %{ 254 | body { width: 750px; margin: 0; margin-left: auto; margin-right: auto; padding: 0; 255 | color: black; background-color: white; } 256 | a { color: #CC6600; text-decoration: none; } 257 | a:visited { color: #CC6600; text-decoration: none;} 258 | a:hover { text-decoration: underline; } 259 | a.stealthy { color: black; } 260 | a.stealthy:visited { color: black; } 261 | .header { text-align: right; padding-right: .5em; } 262 | div.search { text-align: right; position: relative; top: -1em; } 263 | div.search form input { margin-right: .25em; } 264 | .small { font-size: 70%; } 265 | .tiny { font-size: 60%; } 266 | .totals { font-size: 60%; margin-left: .25em; vertical-align: super; } 267 | .field_labels { font-size: 60%; margin-left: 1em; vertical-align: super; } 268 | h2 {color: #CC6600; padding: 0; margin-bottom: .15em; font-size: 160%;} 269 | h3.header { padding:0; margin:0; position: relative; top: -2.8em; 270 | padding-bottom: .25em; padding-right: 5em; font-size: 80%; } 271 | h1.header a { color: #FF9900; text-decoration: none; 272 | font: bold 250% "Trebuchet MS",Trebuchet,Georgia, Serif; 273 | letter-spacing:-4px; } 274 | 275 | div.pagination { text-align: center; } 276 | ul.pages { list-style: none; padding: 0; display: inline;} 277 | ul.pages li { display: inline; } 278 | form.controls { text-align: right; } 279 | ul.undecorated { list-style: none; padding-left: 1em; margin-bottom: 5em;} 280 | .content { padding-left: 2em; padding-right: 2em; } 281 | table { padding: 0; background-color: #CCEECC; font-size: 75%; 282 | width: 100%; border: 1px solid black; margin: 1em; margin-left: auto; margin-right: auto; } 283 | table.obj tr.controls { text-align: right; font-size: 100%; background-color: #AACCAA; } 284 | table.obj td.label { width: 7em; padding-left: .25em; border-right: 1px solid black; } 285 | table.obj td.value input { width: 80%; margin: .35em; } 286 | input.button { width: 5em; margin-left: .5em; } 287 | table.add tr.controls td { padding: .5em; font-size: 100%; background-color: #AACCAA; } 288 | table.add td { width: 10%; } 289 | table.add td.value { width: 80%; } 290 | table.add td.value input { width: 100%; margin: .35em; } 291 | } 292 | end 293 | end 294 | end 295 | 296 | module DublinCore::Helpers 297 | 298 | def paginate(klass, term = nil) 299 | @total_pages = count/DublinCore::LIMIT + 1 300 | div.pagination do 301 | p "#{@page} of #{@total_pages} pages" 302 | ul.pages do 303 | li { link_if("<<", klass, term, 1) } 304 | li { link_if("<", klass, term, @page - 1) } 305 | page_window.each do |page| 306 | li { link_if("#{page}", klass, term, page) } 307 | end 308 | li { link_if(">", klass, term, @page + 1) } 309 | li { link_if(">>", klass, term, @total_pages) } 310 | end 311 | end 312 | end 313 | 314 | private 315 | 316 | def link_if(string, klass, term, page) 317 | return "#{string} " if (@page == page || 1 > page || page > @total_pages) 318 | a(string, :href => term.nil? ? R(klass, page) : R(klass, term, page)) << " " 319 | end 320 | 321 | def page_window 322 | return 1..@total_pages if @total_pages < 9 323 | size = @total_pages > 9 ? 9 : @total_pages 324 | start = @page - size/2 > 0 ? @page - size/2 : 1 325 | start = @total_pages - size if start+size > @total_pages 326 | start..start+size 327 | end 328 | 329 | end 330 | 331 | module DublinCore::Views 332 | 333 | def layout 334 | html do 335 | head do 336 | title "Dublin Core - Simple Asset Cataloger" 337 | link :rel => 'stylesheet', :type => 'text/css', 338 | :href => '/styles.css', :media => 'screen' 339 | end 340 | body do 341 | h1.header { a 'Nugget Explorer', :href => R(Index) } 342 | h3.header { "exposing ugly metadata" } 343 | div.search do 344 | form({:method => 'get', :action => R(Search)}) do 345 | input :name => 'terms', :type => 'text' 346 | input.button :type => :submit, :value => 'Search' 347 | end 348 | end 349 | a("Home", :href => R(Index)) unless @home 350 | div.content do 351 | self << yield 352 | end 353 | end 354 | end 355 | end 356 | 357 | def browse 358 | if @browse.empty? 359 | p 'No objects found, try adding one.' 360 | else 361 | h3 "Browsing" << (" '#{@field}'" if @field).to_s 362 | ul.undecorated do 363 | @browse.keys.sort.each do |key| 364 | li { _key_value(key, @browse[key]) } 365 | end 366 | end 367 | paginate(Index, @field) if @count > DublinCore::LIMIT 368 | end 369 | end 370 | 371 | def delete_success 372 | p "Delete was successful" 373 | end 374 | 375 | def search 376 | p.results { span "#{count} results for '#{@state.terms}'"; span.tiny "(#{@search_time} secs)" } 377 | ul.undecorated do 378 | @result.keys.sort.each do |record| 379 | li do 380 | a(record.value, :href => R(LinkedTo, record.id)) 381 | span.totals "(#{@result[record]})" 382 | span.field_labels "#{record.field_type.sub(/^DC/, '').downcase} " 383 | end 384 | end 385 | end 386 | paginate(Search) if @count > DublinCore::LIMIT 387 | end 388 | 389 | def edit 390 | h3 "Editing Record" 391 | p "To remove a field entry, just remove it's content." 392 | _form(@obj, :action => R(Edit, @obj)) 393 | end 394 | 395 | def records 396 | @objs.each { |obj| _obj(obj) } 397 | paginate(LinkedTo, @field) if @count > DublinCore::LIMIT 398 | end 399 | 400 | def _obj(obj, edit = false) 401 | table.obj(:cellspacing => 0) do 402 | _edit_controls(obj, edit) 403 | DublinCore::FIELDS.each do |field| 404 | obj.send(field.pluralize.intern).each_with_index do |value, index| 405 | tr do 406 | td.label { 0 == index ? "#{field}(s)" : " " } 407 | if edit 408 | td.value do 409 | input :name => value.class, 410 | :type => 'text', 411 | :value => value.to_s 412 | end 413 | else 414 | td.value { a.stealthy(value, :href => R(LinkedTo, value.id)) } 415 | end 416 | end 417 | end 418 | end 419 | end 420 | end 421 | 422 | def _form(obj, action) 423 | form.controls(:method => 'post', :action => R(Edit)) do 424 | input :type => 'hidden', :name => 'obj_id', :value => obj.id 425 | _obj(obj, true) 426 | input.button :type => :submit, :name => 'action', :value => 'Save' 427 | input.button :type => :submit, :name => 'action', :value => 'Discard' 428 | end 429 | form(:method => 'post', :action => R(DataAdd)) do 430 | input :type => 'hidden', :name => 'obj_id', :value => obj.id 431 | table.add :cellspacing => 0 do 432 | tr.controls do 433 | td(:colspan => 3) { "Add an entry. (All changes above will be lost, so save them first)" } 434 | end 435 | tr do 436 | td do 437 | select(:name => 'field_type') do 438 | DublinCore::FIELDS.each do |field| 439 | option field, :value => "DC#{field.capitalize}" 440 | end 441 | end 442 | end 443 | td.value { input :name => 'field_value', :type => 'text' } 444 | td { input.button :type => 'submit', :value => 'Add' } 445 | end 446 | end 447 | end 448 | end 449 | 450 | def _edit_controls(obj, edit) 451 | tr.controls do 452 | td :colspan => 2 do 453 | edit ? input(:type => 'submit', :name => 'action', :value => 'Delete') : 454 | a('edit', :href => R(Edit, obj)) 455 | end 456 | end 457 | end 458 | 459 | 460 | def _key_value(key, value) 461 | if value > 0 462 | if key.kind_of?(DublinCore::Models::Field) 463 | a(key, :href => R(LinkedTo, key.id)) 464 | else 465 | a(key.to_s, :href => R(Index, key)) 466 | end 467 | span.totals "(#{value})" 468 | else 469 | span key 470 | span.totals "(#{value})" 471 | end 472 | end 473 | 474 | end 475 | -------------------------------------------------------------------------------- /gemfiles/rails_60.gemfile: -------------------------------------------------------------------------------- 1 | # This file was generated by Appraisal 2 | 3 | source "http://rubygems.org" 4 | 5 | gem "jruby-openssl", platform: :jruby 6 | gem "activerecord", "~> 6.0.0" 7 | gem "concurrent-ruby", "< 1.3.5" 8 | gem "sqlite3", ">= 1.4.0", "< 2", platform: [:ruby, :mswin] 9 | 10 | group :test do 11 | gem "activerecord-jdbcsqlite3-adapter", platform: [:jruby] 12 | gem "libxml-ruby", platform: [:ruby, :mswin] 13 | gem "rake" 14 | gem "yard" 15 | gem "redcarpet", platform: :ruby 16 | gem "kramdown", platform: :jruby 17 | gem "test-unit" 18 | end 19 | 20 | gemspec path: "../" 21 | -------------------------------------------------------------------------------- /gemfiles/rails_61.gemfile: -------------------------------------------------------------------------------- 1 | # This file was generated by Appraisal 2 | 3 | source "http://rubygems.org" 4 | 5 | gem "jruby-openssl", platform: :jruby 6 | gem "activerecord", "~> 6.1.0" 7 | gem "concurrent-ruby", "< 1.3.5" 8 | gem "sqlite3", ">= 1.4.0", "< 2", platform: [:ruby, :mswin] 9 | 10 | group :test do 11 | gem "activerecord-jdbcsqlite3-adapter", platform: [:jruby] 12 | gem "libxml-ruby", platform: [:ruby, :mswin] 13 | gem "rake" 14 | gem "yard" 15 | gem "redcarpet", platform: :ruby 16 | gem "kramdown", platform: :jruby 17 | gem "test-unit" 18 | end 19 | 20 | gemspec path: "../" 21 | -------------------------------------------------------------------------------- /gemfiles/rails_70.gemfile: -------------------------------------------------------------------------------- 1 | # This file was generated by Appraisal 2 | 3 | source "http://rubygems.org" 4 | 5 | gem "jruby-openssl", platform: :jruby 6 | gem "activerecord", "~> 7.0.0" 7 | gem "concurrent-ruby", "< 1.3.5" 8 | gem "sqlite3", ">= 1.4.0", "< 2", platform: [:ruby, :mswin] 9 | 10 | group :test do 11 | gem "activerecord-jdbcsqlite3-adapter", platform: [:jruby] 12 | gem "libxml-ruby", platform: [:ruby, :mswin] 13 | gem "rake" 14 | gem "yard" 15 | gem "redcarpet", platform: :ruby 16 | gem "kramdown", platform: :jruby 17 | gem "test-unit" 18 | end 19 | 20 | gemspec path: "../" 21 | -------------------------------------------------------------------------------- /gemfiles/rails_71.gemfile: -------------------------------------------------------------------------------- 1 | # This file was generated by Appraisal 2 | 3 | source "http://rubygems.org" 4 | 5 | gem "jruby-openssl", platform: :jruby 6 | gem "activerecord", "~> 7.1.0" 7 | gem "sqlite3", ">= 1.4.0", "< 3.0", platform: [:ruby, :mswin] 8 | 9 | group :test do 10 | gem "activerecord-jdbcsqlite3-adapter", platform: [:jruby] 11 | gem "libxml-ruby", platform: [:ruby, :mswin] 12 | gem "rake" 13 | gem "yard" 14 | gem "redcarpet", platform: :ruby 15 | gem "kramdown", platform: :jruby 16 | gem "test-unit" 17 | end 18 | 19 | gemspec path: "../" 20 | -------------------------------------------------------------------------------- /gemfiles/rails_72.gemfile: -------------------------------------------------------------------------------- 1 | # This file was generated by Appraisal 2 | 3 | source "http://rubygems.org" 4 | 5 | gem "jruby-openssl", platform: :jruby 6 | gem "activerecord", "~> 7.2.0" 7 | gem "sqlite3", ">= 1.4.0", "< 3.0", platform: [:ruby, :mswin] 8 | 9 | group :test do 10 | gem "activerecord-jdbcsqlite3-adapter", platform: [:jruby] 11 | gem "libxml-ruby", platform: [:ruby, :mswin] 12 | gem "rake" 13 | gem "yard" 14 | gem "redcarpet", platform: :ruby 15 | gem "kramdown", platform: :jruby 16 | gem "test-unit" 17 | end 18 | 19 | gemspec path: "../" 20 | -------------------------------------------------------------------------------- /gemfiles/rails_80.gemfile: -------------------------------------------------------------------------------- 1 | # This file was generated by Appraisal 2 | 3 | source "http://rubygems.org" 4 | 5 | gem "jruby-openssl", platform: :jruby 6 | gem "activerecord", "~> 8.0.0" 7 | gem "sqlite3", ">= 1.4.0", "< 3.0", platform: [:ruby, :mswin] 8 | 9 | group :test do 10 | gem "activerecord-jdbcsqlite3-adapter", platform: [:jruby] 11 | gem "libxml-ruby", platform: [:ruby, :mswin] 12 | gem "rake" 13 | gem "yard" 14 | gem "redcarpet", platform: :ruby 15 | gem "kramdown", platform: :jruby 16 | gem "test-unit" 17 | end 18 | 19 | gemspec path: "../" 20 | -------------------------------------------------------------------------------- /lib/oai.rb: -------------------------------------------------------------------------------- 1 | require 'rubygems' 2 | require 'date' 3 | 4 | # Sub projects (client, provider) require their own libraries so the user 5 | # can selectively load them. 6 | require 'oai/client' 7 | require 'oai/provider' 8 | 9 | -------------------------------------------------------------------------------- /lib/oai/client.rb: -------------------------------------------------------------------------------- 1 | # encoding: us-ascii 2 | 3 | # External dependencies 4 | require 'uri' 5 | require 'faraday' 6 | require 'cgi' 7 | 8 | if not defined?(OAI::Const::VERBS) 9 | # Shared stuff 10 | require 'oai/exception' 11 | require 'oai/constants' 12 | require 'oai/xpath' 13 | require 'oai/set' 14 | end 15 | 16 | # Localize requires so user can select a subset of functionality 17 | require 'oai/client/metadata_format' 18 | require 'oai/client/response' 19 | require 'oai/client/header' 20 | require 'oai/client/record' 21 | require 'oai/client/identify' 22 | require 'oai/client/get_record' 23 | require 'oai/client/resumable' 24 | require 'oai/client/list_identifiers' 25 | require 'oai/client/list_metadata_formats' 26 | require 'oai/client/list_records' 27 | require 'oai/client/list_sets' 28 | 29 | module OAI 30 | 31 | # A `OAI::Client` provides a client api for issuing OAI-PMH verbs against 32 | # a OAI-PMH server. The 6 OAI-PMH verbs translate directly to methods you 33 | # can call on a `OAI::Client` object. Verb arguments are passed as a hash: 34 | # 35 | # ```ruby 36 | # client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi' 37 | # record = client.get_record :identifier => 'oai:pubmedcentral.gov:13901' 38 | # for identifier in client.list_identifiers 39 | # puts identifier 40 | # end 41 | # ``` 42 | # 43 | # It is worth noting that the API uses methods and parameter names with 44 | # underscores in them rather than studly caps. So above `list_identifiers` 45 | # and `metadata_prefix` are used instead of the `listIdentifiers` and 46 | # `metadataPrefix` used in the OAI-PMH specification. 47 | # 48 | # Also, the from and until arguments which specify dates should be passed 49 | # in as `Date` or `DateTime` objects depending on the granularity supported 50 | # by the server. 51 | # 52 | # For detailed information on the arguments that can be used please consult 53 | # the OAI-PMH docs at 54 | # . 55 | 56 | class Client 57 | UNESCAPED_AMPERSAND = /&(?!(?:amp|lt|gt|quot|apos|\#\d+);)/ 58 | # The constructor which must be passed a valid base url for an oai 59 | # service: 60 | # 61 | # client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi' 62 | # 63 | # If you want to see debugging messages on `STDERR` use: 64 | # 65 | # client = OAI::Client.new 'http://example.com', :debug => true 66 | # 67 | # By default OAI verbs called on the client will return `REXML::Element` 68 | # objects for metadata records, however if you wish you can use the 69 | # `:parser` option to indicate you want to use `libxml` instead, and get 70 | # back `XML::Node` objects 71 | # 72 | # client = OAI::Client.new 'http://example.com', :parser => 'libxml' 73 | # 74 | # You can configure the Faraday HTTP client by providing an alternate 75 | # Faraday instance: 76 | # 77 | # ```ruby 78 | # client = OAI::Client.new 'http://example.com', :http => Faraday.new {|c|} 79 | # ``` 80 | # 81 | # ### HIGH PERFORMANCE 82 | # 83 | # If you want to supercharge this api install `libxml-ruby >= 0.3.8` and 84 | # use the `:parser` option when you construct your `OAI::Client`. 85 | # 86 | def initialize(base_url, options={}) 87 | @base = URI.parse base_url 88 | @debug = options.fetch(:debug, false) 89 | @parser = options.fetch(:parser, 'rexml') 90 | @headers = options.fetch(:headers, {}) 91 | 92 | @http_client = options.fetch(:http) do 93 | Faraday.new(:url => @base.clone) do |builder| 94 | follow_redirects = options.fetch(:redirects, true) 95 | follow_redirects = 5 if follow_redirects == true 96 | 97 | if follow_redirects 98 | require 'faraday/follow_redirects' 99 | builder.use Faraday::FollowRedirects::Middleware 100 | builder.response :follow_redirects, :limit => follow_redirects.to_i 101 | end 102 | builder.adapter :net_http 103 | end 104 | end 105 | 106 | # load appropriate parser 107 | case @parser 108 | when 'libxml' 109 | begin 110 | require 'rubygems' 111 | require 'xml/libxml' 112 | rescue 113 | raise OAI::Exception.new("xml/libxml not available") 114 | end 115 | when 'rexml' 116 | require 'rexml/document' 117 | require 'rexml/xpath' 118 | else 119 | raise OAI::Exception.new("unknown parser: #{@parser}") 120 | end 121 | end 122 | 123 | # Equivalent to a `Identify` request. 124 | # You'll get back a `OAI::IdentifyResponse` 125 | # object which is essentially just a wrapper around a `REXML::Document` 126 | # for the response. If you created your client using the `libxml` 127 | # parser then you will get an `XML::Node` object instead. 128 | def identify 129 | OAI::IdentifyResponse.new(do_request('Identify')) 130 | end 131 | 132 | # Equivalent to a `ListMetadataFormats` request. 133 | # A `ListMetadataFormatsResponse` object is returned to you. 134 | 135 | def list_metadata_formats(opts={}) 136 | OAI::ListMetadataFormatsResponse.new(do_request('ListMetadataFormats', opts)) 137 | end 138 | 139 | # Equivalent to a `ListIdentifiers` request. Pass in `:from`, 140 | # `:until` arguments as `Date` or `DateTime` objects as appropriate 141 | # depending on the granularity supported by the server. 142 | # 143 | # You can use seamless resumption with this verb, which allows you to 144 | # mitigate (to some extent) the lack of a `Count` verb: 145 | # 146 | # client.list_identifiers.full.count # Don't try this on PubMed though! 147 | # 148 | def list_identifiers(opts={}) 149 | do_resumable(OAI::ListIdentifiersResponse, 'ListIdentifiers', opts) 150 | end 151 | 152 | # Equivalent to a `GetRecord` request. You must supply an `:identifier` 153 | # argument. You should get back a `OAI::GetRecordResponse` object 154 | # which you can extract a `OAI::Record` object from. 155 | def get_record(opts={}) 156 | OAI::GetRecordResponse.new(do_request('GetRecord', opts)) 157 | end 158 | 159 | # Equivalent to the `ListRecords` request. A `ListRecordsResponse` 160 | # will be returned which you can use to iterate through records 161 | # 162 | # response = client.list_records 163 | # response.each do |record| 164 | # puts record.metadata 165 | # end 166 | # 167 | # Alternately, you can use seamless resumption to avoid handling 168 | # resumption tokens: 169 | # 170 | # client.list_records.full.each do |record| 171 | # puts record.metadata 172 | # end 173 | # 174 | # ### Memory Use 175 | # `:full` will avoid storing more than one page of records in 176 | # memory, but your use it in ways that override that behaviour. Be careful 177 | # to avoid using `client.list_records.full.entries` unless you really want 178 | # to hold all the records in the feed in memory! 179 | def list_records(opts={}) 180 | do_resumable(OAI::ListRecordsResponse, 'ListRecords', opts) 181 | end 182 | 183 | # Equivalent to the `ListSets` request. A `ListSetsResponse` object 184 | # will be returned which you can use for iterating through the 185 | # `OAI::Set` objects 186 | # 187 | # for set in client.list_sets 188 | # puts set 189 | # end 190 | # 191 | # A large number of sets is not unusual for some OAI-PMH feeds, so 192 | # using seamless resumption may be preferable: 193 | # 194 | # client.list_sets.full.each do |set| 195 | # puts set 196 | # end 197 | def list_sets(opts={}) 198 | do_resumable(OAI::ListSetsResponse, 'ListSets', opts) 199 | end 200 | 201 | def sanitize_xml(xml) 202 | xml = strip_invalid_utf_8_chars(xml) 203 | xml = strip_invalid_xml_chars(xml) 204 | if @parser == 'libxml' 205 | # remove default namespace for oai-pmh since libxml 206 | # isn't able to use our xpaths to get at them 207 | # if you know a way around thins please let me know 208 | xml = xml.gsub( 209 | /xmlns=\"http:\/\/www.openarchives.org\/OAI\/.\..\/\"/, '') 210 | end 211 | xml 212 | end 213 | 214 | private 215 | 216 | def do_request(verb, opts = nil) 217 | # fire off the request and return appropriate DOM object 218 | uri = build_uri(verb, opts) 219 | return load_document(get(uri)) 220 | end 221 | 222 | def do_resumable(responseClass, verb, opts) 223 | responseClass.new(do_request(verb, opts)) do |response| 224 | responseClass.new \ 225 | do_request(verb, :resumption_token => response.resumption_token) 226 | end 227 | end 228 | 229 | def build_uri(verb, opts) 230 | opts = validate_options(verb, opts) 231 | uri = @base 232 | uri.query = "verb=" << verb 233 | opts.each_pair { |k,v| uri.query << '&' << externalize(k) << '=' << encode(v) } 234 | uri 235 | end 236 | 237 | def encode(value) 238 | return CGI.escape(value) unless value.respond_to?(:strftime) 239 | if value.kind_of?(DateTime) 240 | Time.parse(value.asctime).utc.xmlschema 241 | elsif value.kind_of?(Time) 242 | value.utc.xmlschema 243 | else # Assume something date like 244 | value.strftime('%Y-%m-%d') 245 | end 246 | end 247 | 248 | def load_document(xml) 249 | xml = sanitize_xml(xml) 250 | case @parser 251 | when 'libxml' 252 | begin 253 | parser = XML::Parser.string(xml) 254 | return parser.parse 255 | rescue XML::Error => e 256 | raise OAI::Exception, 'response not well formed XML: '+e, caller 257 | end 258 | when 'rexml' 259 | begin 260 | return REXML::Document.new(xml) 261 | rescue REXML::ParseException => e 262 | raise OAI::Exception, 'response not well formed XML: '+e.message, caller 263 | end 264 | end 265 | end 266 | 267 | # Do the actual HTTP get, following any temporary redirects 268 | def get(uri) 269 | response = @http_client.get do |req| 270 | req.url uri 271 | req.headers.merge! @headers 272 | end 273 | 274 | response.body 275 | end 276 | 277 | def debug(msg) 278 | $stderr.print("#{msg}\n") if @debug 279 | end 280 | 281 | # Massage the standard OAI options to make them a bit more palatable. 282 | def validate_options(verb, opts = {}) 283 | raise OAI::VerbException.new unless Const::VERBS.keys.include?(verb) 284 | 285 | return {} if opts.nil? 286 | 287 | raise OAI::ArgumentException.new unless opts.respond_to?(:keys) 288 | 289 | realopts = {} 290 | # Internalize the hash 291 | opts.keys.each do |key| 292 | realopts[key.to_s.gsub(/([A-Z])/, '_\1').downcase.intern] = opts.delete(key) 293 | end 294 | 295 | return realopts if is_resumption?(realopts) 296 | 297 | # add in a default metadataPrefix if none exists 298 | if(Const::VERBS[verb].include?(:metadata_prefix)) 299 | realopts[:metadata_prefix] ||= 'oai_dc' 300 | end 301 | 302 | # Convert date formated strings in dates. 303 | #realopts[:from] = parse_date(realopts[:from]) if realopts[:from] 304 | #realopts[:until] = parse_date(realopts[:until]) if realopts[:until] 305 | 306 | # check for any bad options 307 | unless (realopts.keys - OAI::Const::VERBS[verb]).empty? 308 | raise OAI::ArgumentException.new 309 | end 310 | realopts 311 | end 312 | 313 | def is_resumption?(opts) 314 | if opts.keys.include?(:resumption_token) 315 | return true if 1 == opts.keys.size 316 | raise OAI::ArgumentException.new 317 | end 318 | end 319 | 320 | # Convert our internal representations back into standard OAI options 321 | def externalize(value) 322 | value.to_s.gsub(/_[a-z]/) { |m| m.sub("_", '').capitalize } 323 | end 324 | 325 | def parse_date(value) 326 | return value if value.respond_to?(:strftime) 327 | 328 | Date.parse(value) # This will raise an exception for badly formatted dates 329 | Time.parse(value).utc # Sadly, this will not 330 | rescue 331 | raise OAI::ArgumentException.new, "unparsable date: '#{value}'" 332 | end 333 | 334 | # Strip out invalid UTF-8 characters. Regex from the W3C, inverted. 335 | # http://www.w3.org/International/questions/qa-forms-utf-8.en.php 336 | # 337 | # Regex is from WebCollab: 338 | # http://webcollab.sourceforge.net/unicode.html 339 | def strip_invalid_utf_8_chars(xml) 340 | return nil unless xml 341 | 342 | # If it's in a specific encoding other than BINARY, it may trigger 343 | # an exception to try to gsub these illegal bytes. Temporarily 344 | # put it in BINARY. NOTE: We're not totally sure what's going on 345 | # with encodings in this gem in general, it might not be totally reasonable. 346 | orig_encoding = xml.encoding 347 | xml.force_encoding("BINARY") 348 | 349 | xml = xml.gsub(/[\x00-\x08\x10\x0B\x0C\x0E-\x19\x7F] 350 | | [\x00-\x7F][\x80-\xBF]+ 351 | | ([\xC0\xC1]|[\xF0-\xFF])[\x80-\xBF]* 352 | | [\xC2-\xDF]((?![\x80-\xBF])|[\x80-\xBF]{2,}) 353 | | [\xE0-\xEF](([\x80-\xBF](?![\x80-\xBF])) 354 | | (?![\x80-\xBF]{2})|[\x80-\xBF]{3,})/x, '?')\ 355 | .gsub(/\xE0[\x80-\x9F][\x80-\xBF] 356 | | \xED[\xA0-\xBF][\x80-\xBF]/,'?') 357 | 358 | xml.force_encoding(orig_encoding) 359 | 360 | xml 361 | end 362 | 363 | def strip_invalid_xml_chars(xml) 364 | return xml unless xml =~ UNESCAPED_AMPERSAND 365 | xml.gsub(UNESCAPED_AMPERSAND, '&') 366 | end 367 | end 368 | end 369 | -------------------------------------------------------------------------------- /lib/oai/client/get_record.rb: -------------------------------------------------------------------------------- 1 | module OAI 2 | class GetRecordResponse < Response 3 | include OAI::XPath 4 | attr_accessor :record 5 | 6 | def initialize(doc) 7 | super doc 8 | @record = OAI::Record.new(xpath_first(doc, './/GetRecord/record')) 9 | end 10 | 11 | def deleted? 12 | return @record.deleted? 13 | end 14 | end 15 | end 16 | -------------------------------------------------------------------------------- /lib/oai/client/header.rb: -------------------------------------------------------------------------------- 1 | module OAI 2 | class Header 3 | include OAI::XPath 4 | attr_accessor :status, :identifier, :datestamp, :set_spec 5 | 6 | def initialize(element) 7 | @status = get_attribute(element, 'status') 8 | @identifier = xpath(element, './/identifier') 9 | @datestamp = xpath(element, './/datestamp') 10 | @set_spec = xpath_all(element, './/setSpec') 11 | end 12 | 13 | def deleted? 14 | return true if @status.to_s == "deleted" 15 | end 16 | 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /lib/oai/client/identify.rb: -------------------------------------------------------------------------------- 1 | module OAI 2 | class IdentifyResponse < Response 3 | include OAI::XPath 4 | attr_accessor :repository_name, :base_url, :protocol, :admin_email, 5 | :earliest_datestamp, :deleted_record, :granularity, :compression 6 | 7 | def initialize(doc) 8 | super doc 9 | @repository_name = xpath(doc, './/Identify/repositoryName') 10 | @base_url = xpath(doc, './/Identify/baseURL') 11 | @protocol = xpath(doc, './/Identify/protocol') 12 | @admin_email = xpath(doc, './/Identify/adminEmail') 13 | @earliest_datestamp = xpath(doc, './/Identify/earliestDatestamp') 14 | @deleted_record = xpath(doc, './/Identify/deletedRecord') 15 | @granularity = xpath(doc, './/Identify/granularity') 16 | @compression = xpath(doc, '..//Identify/compression') 17 | end 18 | 19 | def to_s 20 | return "#{@repository_name} [#{@base_url}]" 21 | end 22 | 23 | # returns REXML::Element nodes for each description section 24 | # if the OAI::Client was configured to use libxml then you will 25 | # instead get a LibXML::XML::Node object. 26 | def descriptions 27 | return xpath_all(doc, './/Identify/description') 28 | end 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /lib/oai/client/list_identifiers.rb: -------------------------------------------------------------------------------- 1 | module OAI 2 | class ListIdentifiersResponse < Response 3 | include Enumerable 4 | include OAI::Resumable 5 | include OAI::XPath 6 | 7 | def each 8 | for header_element in xpath_all(@doc, './/ListIdentifiers/header') 9 | yield OAI::Header.new(header_element) 10 | end 11 | end 12 | end 13 | end 14 | -------------------------------------------------------------------------------- /lib/oai/client/list_metadata_formats.rb: -------------------------------------------------------------------------------- 1 | module OAI 2 | class ListMetadataFormatsResponse < Response 3 | include Enumerable 4 | include OAI::XPath 5 | 6 | def each 7 | for format in xpath_all(@doc, './/metadataFormat') 8 | yield MetadataFormat.new(format) 9 | end 10 | end 11 | end 12 | end 13 | -------------------------------------------------------------------------------- /lib/oai/client/list_records.rb: -------------------------------------------------------------------------------- 1 | module OAI 2 | 3 | # allows for iteration across a list of records 4 | # 5 | # client.list_records(:metadata_prefix => 'oai_dc').each do |record| 6 | # puts record.metadata 7 | # end 8 | # 9 | # you'll need to handle resumption tokens 10 | 11 | class ListRecordsResponse < Response 12 | include Enumerable 13 | include OAI::Resumable 14 | include OAI::XPath 15 | 16 | def each 17 | for record_element in xpath_all(@doc, './/ListRecords/record') 18 | yield OAI::Record.new(record_element) 19 | end 20 | end 21 | end 22 | end 23 | -------------------------------------------------------------------------------- /lib/oai/client/list_sets.rb: -------------------------------------------------------------------------------- 1 | module OAI 2 | 3 | # allows for iteration of the sets found in a oai-pmh server 4 | # 5 | # for set in client.list_sets 6 | # puts set 7 | # end 8 | 9 | class ListSetsResponse < Response 10 | include Enumerable 11 | include OAI::Resumable 12 | include OAI::XPath 13 | 14 | def each 15 | for set_element in xpath_all(@doc, './/set') 16 | yield OAI::Set.parse(set_element) 17 | end 18 | end 19 | end 20 | end 21 | -------------------------------------------------------------------------------- /lib/oai/client/metadata_format.rb: -------------------------------------------------------------------------------- 1 | module OAI 2 | class MetadataFormat 3 | include OAI::XPath 4 | attr_accessor :prefix, :schema, :namespace 5 | 6 | def initialize(element) 7 | @prefix = xpath(element, './/metadataPrefix') 8 | @schema = xpath(element, './/schema') 9 | @namespace = xpath(element, './/metadataNamespace') 10 | end 11 | end 12 | end 13 | -------------------------------------------------------------------------------- /lib/oai/client/record.rb: -------------------------------------------------------------------------------- 1 | module OAI 2 | 3 | # A class for representing a Record as returned from a `GetRecord` 4 | # or `ListRecords` request. Each record will have a header and metadata 5 | # attribute. The header is a {OAI::Header} object and the metadata is 6 | # a `REXML::Element` object for that chunk of XML. 7 | # 8 | # Note: if your {OAI::Client} was configured to use the 'libxml' parser 9 | # metadata will return a `XML::Node` object instead. 10 | class Record 11 | include OAI::XPath 12 | attr_accessor :header, :metadata, :about, :_source 13 | 14 | def initialize(element) 15 | @_source = element 16 | @header = OAI::Header.new xpath_first(element, './/header') 17 | @metadata = xpath_first(element, './/metadata') 18 | @about = xpath_first(element, './/about') 19 | end 20 | 21 | # a convenience method which digs into the header status attribute 22 | # and returns true if the value is set to 'deleted' 23 | def deleted? 24 | return @header.deleted? 25 | end 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /lib/oai/client/response.rb: -------------------------------------------------------------------------------- 1 | module OAI 2 | 3 | # An OAI::Response contains entries and a resumption token. If a resumption token is present, 4 | # then you must use it to fetch the rest of the entries for your query. For example: 5 | # 6 | # ```ruby 7 | # # List all records in a given set 8 | # client = OAI::Client.new 'http://my-oai-provider.example.com/oai' 9 | # response = client.list_records :set => 'my_set_name' 10 | # while response.entries.count > 0 11 | # response.entries.each { |entry| 12 | # puts entry.header.identifier 13 | # } 14 | # token = response.resumption_token 15 | # # Note: You do not need to pass the options hash again, just the verb and the resumption token 16 | # response = client.list_records :resumption_token => token if token 17 | # end 18 | # ``` 19 | class Response 20 | include OAI::XPath 21 | attr_reader :doc, :resumption_token, :resumption_block 22 | 23 | def initialize(doc, &resumption_block) 24 | @doc = doc 25 | @resumption_token = xpath(doc, './/resumptionToken') 26 | @resumption_block = resumption_block 27 | 28 | # throw an exception if there was an error 29 | error = xpath_first(doc, './/error') 30 | return unless error 31 | 32 | case error.class.to_s 33 | when 'REXML::Element' 34 | message = error.text 35 | code = error.attributes['code'] 36 | when 'LibXML::XML::Node' 37 | message = error.content 38 | code = "" 39 | if defined?(error.property) == nil 40 | code = error.attributes['code'] 41 | else 42 | begin 43 | code = error["code"] 44 | rescue 45 | code = error.property('code') 46 | end 47 | end 48 | end 49 | raise OAI::Exception.for(message: message, code: code) 50 | end 51 | 52 | end 53 | end 54 | -------------------------------------------------------------------------------- /lib/oai/client/resumable.rb: -------------------------------------------------------------------------------- 1 | module OAI 2 | module Resumable 3 | 4 | class ResumptionWrapper 5 | include Enumerable 6 | 7 | def initialize(response) 8 | @response = response 9 | @resumption_block = response.resumption_block 10 | end 11 | 12 | def each(&block) 13 | yield_from_response &block 14 | while resumable? 15 | @response = @resumption_block.call @response 16 | yield_from_response &block 17 | end 18 | end 19 | 20 | private 21 | 22 | def yield_from_response(&block) 23 | @response.each do |obj| 24 | block.call(obj) 25 | end 26 | end 27 | 28 | def resumable? 29 | @response.resumption_token and not @response.resumption_token.empty? 30 | end 31 | 32 | end 33 | 34 | def full 35 | if @resumption_block.nil? 36 | raise NotImplementedError.new("Resumption block not provided") 37 | end 38 | ResumptionWrapper.new(self) 39 | end 40 | 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /lib/oai/constants.rb: -------------------------------------------------------------------------------- 1 | module OAI 2 | 3 | module Const 4 | # OAI defines six verbs with various allowable options. 5 | VERBS = { 6 | 'Identify' => [], 7 | 'ListMetadataFormats' => [:identifier], 8 | 'ListSets' => [:resumption_token], # unused currently 9 | 'GetRecord' => [:identifier, :from, :until, :set, :metadata_prefix], 10 | 'ListIdentifiers' => [:from, :until, :set, :metadata_prefix, :resumption_token], 11 | 'ListRecords' => [:from, :until, :set, :metadata_prefix, :resumption_token] 12 | }.freeze 13 | 14 | RESERVED_WORDS = %w{type id} 15 | 16 | # Two granularities are supported in OIA-PMH, daily or seconds. 17 | module Granularity 18 | LOW = 'YYYY-MM-DD' 19 | HIGH = 'YYYY-MM-DDThh:mm:ssZ' 20 | end 21 | 22 | # Repositories can support three different schemes for dealing with deletions. 23 | # * NO - No deletions allowed 24 | # * TRANSIENT - Deletions are supported but may not be permanently maintained. 25 | # * PERSISTENT - Deletions are supported and are permanently maintained. 26 | module Delete 27 | NO = :no 28 | TRANSIENT = :transient 29 | PERSISTENT = :persistent 30 | end 31 | 32 | end 33 | 34 | end 35 | -------------------------------------------------------------------------------- /lib/oai/exception.rb: -------------------------------------------------------------------------------- 1 | module OAI 2 | 3 | # Standard error responses for problems serving OAI content. These 4 | # messages will be wrapped in an XML response to the client. 5 | 6 | class Exception < RuntimeError 7 | CODE = nil 8 | MESSAGE = nil 9 | 10 | attr_reader :code 11 | 12 | @@codes = {} 13 | 14 | def self.register_exception_code(code, exception_class) 15 | @@codes[code] = exception_class if exception_class.superclass == OAI::Exception 16 | end 17 | 18 | def self.for(message: nil, code: nil) 19 | @@codes.fetch(code, Exception).new(message) 20 | end 21 | 22 | def initialize(message = nil, code = nil) 23 | super(message || self.class::MESSAGE) 24 | @code = code || self.class::CODE 25 | end 26 | end 27 | 28 | class ArgumentException < Exception 29 | CODE = 'badArgument' 30 | MESSAGE = 'The request includes ' \ 31 | 'illegal arguments, is missing required arguments, includes a ' \ 32 | 'repeated argument, or values for arguments have an illegal syntax.' 33 | register_exception_code(CODE, self) 34 | end 35 | 36 | class VerbException < Exception 37 | CODE = 'badVerb' 38 | MESSAGE = 'Value of the verb argument is not a legal OAI-PMH '\ 39 | 'verb, the verb argument is missing, or the verb argument is repeated.' 40 | register_exception_code(CODE, self) 41 | end 42 | 43 | class FormatException < Exception 44 | CODE = 'cannotDisseminateFormat' 45 | MESSAGE = 'The metadata format identified by '\ 46 | 'the value given for the metadataPrefix argument is not supported '\ 47 | 'by the item or by the repository.' 48 | register_exception_code(CODE, self) 49 | end 50 | 51 | class IdException < Exception 52 | CODE = 'idDoesNotExist' 53 | MESSAGE = 'The value of the identifier argument is '\ 54 | 'unknown or illegal in this repository.' 55 | register_exception_code(CODE, self) 56 | end 57 | 58 | class NoMatchException < Exception 59 | CODE = 'noRecordsMatch' 60 | MESSAGE = 'The combination of the values of the from, '\ 61 | 'until, set and metadataPrefix arguments results in an empty list.' 62 | register_exception_code(CODE, self) 63 | end 64 | 65 | class MetadataFormatException < Exception 66 | CODE = 'noMetadataFormats' 67 | MESSAGE = 'There are no metadata formats available '\ 68 | 'for the specified item.' 69 | register_exception_code(CODE, self) 70 | end 71 | 72 | class SetException < Exception 73 | CODE = 'noSetHierarchy' 74 | MESSAGE = 'This repository does not support sets.' 75 | register_exception_code(CODE, self) 76 | end 77 | 78 | class ResumptionTokenException < Exception 79 | CODE = 'badResumptionToken' 80 | MESSAGE = 'The value of the resumptionToken argument is invalid or expired.' 81 | register_exception_code(CODE, self) 82 | end 83 | end -------------------------------------------------------------------------------- /lib/oai/harvester.rb: -------------------------------------------------------------------------------- 1 | require 'time' 2 | require 'zlib' 3 | require 'net/smtp' 4 | require 'yaml' 5 | require 'tempfile' 6 | require 'logger' 7 | require 'fileutils' 8 | require 'ostruct' 9 | require 'readline' 10 | require 'socket' 11 | 12 | if not defined?(OAI::Const::VERBS) 13 | require 'oai/constants' 14 | end 15 | 16 | require 'oai/client' 17 | require 'oai/harvester/config' 18 | require 'oai/harvester/harvest' 19 | require 'oai/harvester/logging' 20 | require 'oai/harvester/mailer' 21 | require 'oai/harvester/shell' 22 | 23 | def harvestable_sites(conf) 24 | sites = [] 25 | conf.sites.each do |k, v| 26 | sites << k if needs_updating(v['period'], v['last']) 27 | end if conf.sites 28 | sites 29 | end 30 | 31 | def needs_updating(period, last) 32 | return true if last.nil? 33 | case period 34 | when 'daily' 35 | return true if Time.now - last > 86000 36 | when 'weekly' 37 | return true if Time.now - last > 604000 38 | when 'monthly' 39 | return true if Time.now - last > 2591000 40 | end 41 | return false 42 | end 43 | 44 | -------------------------------------------------------------------------------- /lib/oai/harvester/config.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Created by William Groppe on 2006-11-05. 3 | # Copyright (c) 2006. All rights reserved. 4 | 5 | module OAI 6 | module Harvester 7 | 8 | LOW_RESOLUTION = OAI::Const::Granularity::LOW 9 | 10 | class Config < OpenStruct 11 | 12 | PERIODS = %w(daily weekly monthly) 13 | GLOBAL = "/etc/oai/harvester.yml" 14 | 15 | def self.load 16 | config = find_config 17 | File.exists?(config) ? new(YAML.load_file(config)) : new 18 | end 19 | 20 | def save 21 | config = Config.find_config 22 | open(config, 'w') do |out| 23 | YAML.dump(@table, out) 24 | end 25 | end 26 | 27 | private 28 | # Shamelessly lifted from Camping 29 | def self.find_config 30 | if home = ENV['HOME'] # POSIX 31 | return GLOBAL if File.exists?(GLOBAL) && File.writable?(GLOBAL) 32 | FileUtils.mkdir_p File.join(home, '.oai') 33 | File.join(home, '.oai/harvester.yml') 34 | elsif home = ENV['APPDATA'] # MSWIN 35 | File.join(home, 'oai/harvester.yml') 36 | end 37 | end 38 | 39 | end 40 | end 41 | end -------------------------------------------------------------------------------- /lib/oai/harvester/harvest.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Created by William Groppe on 2006-11-03. 3 | 4 | module OAI 5 | module Harvester 6 | class Harvest 7 | DIRECTORY_LAYOUT = "%Y/%m".freeze 8 | 9 | def initialize(config = nil, directory = nil, date = nil, to = nil) 10 | @config = config || Config.load 11 | @directory = directory || @config.storage 12 | @from = date 13 | @from.freeze 14 | @until = to 15 | @until.freeze 16 | @parser = defined?(XML::Document) ? 'libxml' : 'rexml' 17 | end 18 | 19 | def start(sites = nil, interactive = false) 20 | @interactive = interactive 21 | sites = (@config.sites.keys rescue {}) unless sites 22 | begin 23 | sites.each do |site| 24 | harvest(site) 25 | end 26 | ensure 27 | @config.save 28 | end 29 | end 30 | 31 | private 32 | 33 | def harvest(site) 34 | opts = build_options_hash(@config.sites[site]) 35 | if @until 36 | harvest_time = @until.to_time.utc 37 | else 38 | harvest_time = Time.now.utc 39 | end 40 | 41 | if OAI::Const::Granularity::LOW == granularity(opts[:url]) 42 | opts[:until] = harvest_time.strftime("%Y-%m-%d") 43 | opts[:from] = @from.strftime("%Y-%m-%d") if @from 44 | else 45 | opts[:until] = harvest_time.xmlschema 46 | opts[:from] = @from.xmlschema if @from 47 | end 48 | 49 | # Allow a from date to be passed in 50 | opts[:from] = earliest(opts[:url]) unless opts[:from] 51 | opts.delete(:set) if 'all' == opts[:set] 52 | begin 53 | # Connect, and download 54 | file, records = call(opts.delete(:url), opts) 55 | 56 | # Move document to storage directory if configured 57 | if @directory 58 | directory_layout = @config.layouts[site] if @config.layouts 59 | dir = File.join(@directory, date_based_directory(harvest_time, directory_layout)) 60 | FileUtils.mkdir_p dir 61 | FileUtils.mv(file.path, 62 | File.join(dir, "#{site}-#{filename(Time.parse(opts[:from]), 63 | harvest_time)}.xml.gz")) 64 | else 65 | puts "no configured destination for temp file" if @interactive 66 | end 67 | @config.sites[site]['last'] = harvest_time 68 | rescue OAI::NoMatchException 69 | puts "No new records available" if @interactive 70 | rescue OAI::Exception => ex 71 | raise ex if not @interactive 72 | puts ex.message 73 | end 74 | end 75 | 76 | def call(url, opts) 77 | # Preserve original options 78 | options = opts.dup 79 | 80 | records = 0; 81 | client = OAI::Client.new(url, :parser => @parser) 82 | provider_config = client.identify 83 | 84 | file = Tempfile.new('oai_data') 85 | gz = Zlib::GzipWriter.new(file) 86 | gz << "\n" 87 | gz << "" 88 | begin 89 | response = client.list_records(options) 90 | response.each do |rec| 91 | gz << rec._source 92 | records += 1 93 | end 94 | puts "#{records} records retrieved" if @interactive 95 | 96 | # Get a full response by iterating with the resumption tokens. 97 | # Not very Ruby like. Should fix OAI::Client to handle resumption 98 | # tokens internally. 99 | while(response.resumption_token and not response.resumption_token.empty?) 100 | puts "\nresumption token recieved, continuing" if @interactive 101 | response = client.list_records(:resumption_token => 102 | response.resumption_token) 103 | response.each do |rec| 104 | gz << rec._source 105 | records += 1 106 | end 107 | puts "#{records} records retrieved" if @interactive 108 | end 109 | 110 | gz << "" 111 | 112 | ensure 113 | gz.close 114 | file.close 115 | end 116 | 117 | [file, records] 118 | end 119 | 120 | def get_records(doc) 121 | doc.find("/OAI-PMH/ListRecords/record").to_a 122 | end 123 | 124 | def build_options_hash(site) 125 | options = {:url => site['url']} 126 | options[:set] = site['set'] if site['set'] 127 | options[:from] = site['last'].utc.xmlschema if site['last'] 128 | options[:metadata_prefix] = site['prefix'] if site['prefix'] 129 | options 130 | end 131 | 132 | def date_based_directory(time, directory_layout = nil) 133 | directory_layout ||= Harvest::DIRECTORY_LAYOUT 134 | "#{time.strftime(directory_layout)}" 135 | end 136 | 137 | def filename(from_time, until_time) 138 | format = "%Y-%m-%d" 139 | "#{from_time.strftime(format)}_til_#{until_time.strftime(format)}"\ 140 | "_at_#{until_time.strftime('%H-%M-%S')}" 141 | end 142 | 143 | def granularity(url) 144 | client = OAI::Client.new url 145 | client.identify.granularity 146 | end 147 | 148 | # Get earliest timestamp from repository 149 | def earliest(url) 150 | client = OAI::Client.new url 151 | identify = client.identify 152 | if OAI::Const::Granularity::LOW == identify.granularity 153 | Time.parse(identify.earliest_datestamp).strftime("%Y-%m-%d") 154 | else 155 | Time.parse(identify.earliest_datestamp).xmlschema 156 | end 157 | end 158 | 159 | end 160 | 161 | end 162 | end 163 | -------------------------------------------------------------------------------- /lib/oai/harvester/logging.rb: -------------------------------------------------------------------------------- 1 | # Reopen Harvest and add logging 2 | module OAI 3 | module Harvester 4 | 5 | class Harvest 6 | alias_method :orig_start, :start 7 | alias_method :orig_harvest, :harvest 8 | alias_method :orig_call, :call 9 | alias_method :orig_init, :initialize 10 | 11 | def initialize(*args) 12 | orig_init(*args) 13 | @summary = [] 14 | @logger = @config.logfile ? Logger.new(File.join(@config.logfile, "harvester.log"), 'weekly') : Logger.new(STDOUT) 15 | @logger.datetime_format = "%Y-%m-%d %H:%M" 16 | 17 | # Turn off logging if no logging directory is specified. 18 | @logger.level = Logger::FATAL unless @config.logfile 19 | end 20 | 21 | def start(sites = nil, interactive = false) 22 | if not interactive 23 | @logger.info { "Starting regular harvest" } 24 | orig_start(sites) 25 | begin 26 | OAI::Harvester::Mailer.send(@config.mail_server, @config.email, @summary) if @config.email 27 | rescue 28 | @logger.error { "Error sending out summary email: #{$!}"} 29 | end 30 | else 31 | @logger.info { "Starting interactive harvest"} 32 | orig_start(sites, true) 33 | end 34 | end 35 | 36 | private 37 | 38 | def harvest(site) 39 | begin 40 | @logger.info { "Harvest of '#{site}' starting" } 41 | @summary << "Harvest of '#{site}' attempted" 42 | orig_harvest(site) 43 | rescue OAI::Exception 44 | if "noRecordsMatch" == $!.code 45 | @logger.info "No new records available" 46 | @summary << "'#{site}' had no new records." 47 | else 48 | @logger.error { "Harvesting of '#{site}' failed, message: #{$!}" } 49 | @summary << "'#{site}' had an OAI Error! #{$!}" 50 | end 51 | rescue 52 | @logger.error { "Harvesting of '#{site}' failed, message: #{$!}" } 53 | @logger.error { "#{$!.backtrace.join('\n')}" } 54 | @summary << "'#{site}' had an Error! #{$!}" 55 | end 56 | end 57 | 58 | def call(url, options) 59 | @logger.info { "fetching: #{url} with options #{options.inspect}" } 60 | file, records = orig_call(url, options) 61 | @logger.info { "retrieved #{records} records" } 62 | @summary << "Retrieved #{records} records." 63 | return file, records 64 | end 65 | end 66 | 67 | end 68 | end 69 | -------------------------------------------------------------------------------- /lib/oai/harvester/mailer.rb: -------------------------------------------------------------------------------- 1 | module OAI 2 | module Harvester 3 | 4 | class Mailer 5 | 6 | def self.send(server = nil, email = nil, message = nil) 7 | msg = %{Subject: Harvester Summary\n\n#{message.join("\n")}} 8 | to = (email.map { |e| "'#{e}'"}).join(", ") 9 | Net::SMTP.start(server) do |smtp| 10 | smtp.send_message msg, "harvester@#{Socket.gethostname}", to 11 | end 12 | end 13 | 14 | end 15 | 16 | end 17 | end 18 | -------------------------------------------------------------------------------- /lib/oai/harvester/shell.rb: -------------------------------------------------------------------------------- 1 | module OAI 2 | module Harvester 3 | # A OAI-PMH client shell allowing OAI Harvesting to be configured in 4 | # an interactive manner. Typing `oai` on the command line starts the 5 | # shell. The first time the shell is run it will prompt for the following 6 | # configuration details: 7 | # 8 | # 1. A storage directory for all harvested records. Harvests will be 9 | # stored under this directory in a directory structure based on the 10 | # date of the harvest. 11 | # 2. A log file directory. 12 | # 3. Email address(es) for sending daily harvesting activity reports. 13 | # 4. Network address of the SMTP server for sending mail. 14 | # 15 | # After the initial configuration, new harvest sites can be added by using 16 | # the 'new' command. Sites are identified via nickname assigned by the 17 | # user. After choosing a nickname, provide the URL of a harvestable site, 18 | # and the shell will prompt you for the rest of the configuration 19 | # information. 20 | # 21 | # The shell automatically pulls down the list of sets in the repository, and 22 | # the supported metadata prefixes. Making it very simple to setup harvests. 23 | # 24 | class Shell 25 | include Readline 26 | 27 | def initialize(config) 28 | @conf = config 29 | @conf.sites ||= {} # Initialize sites hash there isn't one 30 | end 31 | 32 | def start 33 | unless @conf.storage 34 | banner "Entering first-time setup" 35 | config 36 | setup_cron 37 | end 38 | puts "type 'help' for help" 39 | while((input = readline("oai> ", true)) != 'exit') 40 | begin 41 | cmd = input.split 42 | if 1 == cmd.size 43 | self.send(cmd[0]) 44 | else 45 | self.send(cmd.shift, cmd.join(" ")) 46 | end 47 | rescue NoMethodError 48 | puts "Not a recognized command. Type 'help' for clues." 49 | rescue 50 | puts "An error occurred:" 51 | puts $! 52 | puts $!.backtrace.join("\n") 53 | end 54 | end 55 | end 56 | 57 | private 58 | 59 | def help 60 | banner "Commands:" 61 | puts "\tharvest site [date] - Harvest site(s) manually" 62 | puts "\tconfig - Configure harvester" 63 | puts "\tlist - List known providers or configuration" 64 | puts "\tinfo [site[, site]] - Show information about a provider." 65 | puts "\tnew - Add a new provider site to harvester" 66 | puts "\tremove [site] - Remove a provider site from harvester" 67 | puts "\tedit [site] - Change settings for a provider site" 68 | puts "\texit - Exit the harvester shell.\n\n" 69 | end 70 | 71 | def harvest(options) 72 | site, *date = options.split(/\s/) 73 | if @conf.sites.keys.include?(site) 74 | banner "Harvesting '#{site}'" 75 | if date && !date.empty? 76 | begin 77 | date = Time.parse(date.join(' ')).utc 78 | rescue NoMethodError 79 | puts "Couldn't parse the date supplied" 80 | return 81 | end 82 | else 83 | date = nil 84 | end 85 | harvester = Harvest.new(@conf, @conf.storage, date) 86 | harvester.start(site, true) 87 | puts "done" 88 | else 89 | puts "Unknown repository: '#{args[0]}'" 90 | end 91 | puts # blank line 92 | end 93 | 94 | def list(args = nil) 95 | if 'config' == args 96 | banner "Current Configuration" 97 | list_config 98 | else 99 | banner "Configured Repositories" 100 | @conf.sites.keys.each do |k| 101 | puts k 102 | end 103 | end 104 | puts # blank line 105 | end 106 | 107 | def info(args) 108 | banner "Provider Site Information" 109 | sites = args.split(/[,\s|\s|,]/) 110 | sites.each do |site| 111 | print_site(site) 112 | end 113 | puts 114 | rescue 115 | puts args + " doesn't appear to be configured, use list to see configured repositories." 116 | end 117 | 118 | def new 119 | banner "Define New Harvesting Site" 120 | name, site = form 121 | @conf.sites[name] = site 122 | @conf.save 123 | end 124 | 125 | def edit(name) 126 | banner "Edit Harvesting Site" 127 | name, site = form(name) 128 | @conf.sites[name] = site 129 | @conf.save 130 | end 131 | 132 | def remove(site) 133 | if 'Y' == readline("Remove #{site}? (Y/N): ").upcase 134 | @conf.sites.delete(site) 135 | @conf.save 136 | puts "#{site} removed" 137 | end 138 | end 139 | 140 | # http://oai.getty.edu:80/oaicat/OAIHandler 141 | def form(name = nil) 142 | begin 143 | if not name 144 | name = prompt("nickname", nil) 145 | while(@conf.sites.keys.include?(name)) 146 | show 0, "Nickname already in use, choose another." 147 | name = prompt("nickname") 148 | end 149 | end 150 | site = @conf.sites[name] || {} 151 | 152 | # URL 153 | url = prompt("url", site['url']) 154 | while(not (site['url'] = verify(url))) 155 | puts "Trouble contacting provider, bad url?" 156 | url = prompt("url", site['url']) 157 | end 158 | 159 | # Metadata formats 160 | formats = metadata(site['url']) 161 | report "Repository supports [#{formats.join(', ')}] metadata formats." 162 | prefix = prompt("prefix", site['prefix']) 163 | while(not formats.include?(prefix)) 164 | prefix = prompt("prefix", site['prefix']) 165 | end 166 | site['prefix'] = prefix 167 | 168 | # Sets 169 | sets = ['all'] 170 | begin 171 | sets.concat sets(site['url']) 172 | site['set'] = 'all' unless site['set'] # default to all sets 173 | report "Repository supports [#{sets.join(', ')}] metadata sets." 174 | set = prompt("set", site['set']) 175 | while(not sets.include?(site['set'])) 176 | set = prompt("set", site['set']) 177 | end 178 | site['set'] = set 179 | rescue 180 | site['set'] = 'all' 181 | end 182 | 183 | # Period 184 | period = expand_period(prompt("period", "daily")) 185 | while(not Config::PERIODS.include?(period)) 186 | puts "Must be daily, weekly, or monthly" 187 | period = expand_period(prompt("period", "daily")) 188 | end 189 | 190 | site['period'] = period 191 | 192 | return [name, site] 193 | rescue 194 | puts "Problem adding/updating provider, aborting. (#{$!})" 195 | end 196 | end 197 | 198 | def config 199 | begin 200 | directory = prompt("storage directory", @conf.storage) 201 | while not directory_acceptable(directory) 202 | directory = prompt("storage directory: ", @conf.storage) 203 | end 204 | 205 | email = @conf.email.join(', ') rescue nil 206 | @conf.email = parse_emails(prompt("email", email)) 207 | 208 | @conf.mail_server = prompt("mail server", @conf.mail_server) 209 | 210 | logfile = prompt("log file(s) directory", @conf.logfile) 211 | while not directory_acceptable(logfile) 212 | logfile = prompt("log file(s) directory", @conf.logfile) 213 | end 214 | @conf.storage = directory 215 | @conf.logfile = logfile 216 | @conf.save 217 | rescue 218 | nil 219 | end 220 | end 221 | 222 | def display(key, value, split = 40) 223 | (split - key.size).times { print " " } if key.size < split 224 | puts "#{key}: #{value}" 225 | end 226 | 227 | def banner(str) 228 | puts "\n#{str}" 229 | str.size.times { print "-" } 230 | puts "\n" 231 | end 232 | 233 | def report(str) 234 | puts "\n#{str}\n" 235 | end 236 | 237 | def indent(number) 238 | number.times do 239 | print "\t" 240 | end 241 | end 242 | 243 | def prompt(text, default = nil, split = 20) 244 | prompt_text = "#{text} [#{default}]: " 245 | (split - prompt_text.size).times { print " " } if prompt_text.size < split 246 | value = readline(prompt_text, true) 247 | raise RuntimeError.new("Exit loop") unless value 248 | return value.empty? ? default : value 249 | end 250 | 251 | def verify(url) 252 | begin 253 | client = OAI::Client.new(url, :redirects => false) 254 | identify = client.identify 255 | puts "Repository name \"#{identify.repository_name}\"" 256 | return url 257 | rescue 258 | if $!.to_s =~ /^Permanently Redirected to \[(.*)\?.*\]/ 259 | report "Provider redirected to: #{$1}" 260 | verify($1) 261 | else 262 | puts "Error selecting repository: #{$!}" 263 | end 264 | end 265 | end 266 | 267 | def metadata(url) 268 | formats = [] 269 | client = OAI::Client.new url 270 | response = client.list_metadata_formats 271 | response.to_a.each do |format| 272 | formats << format.prefix 273 | end 274 | formats 275 | end 276 | 277 | def sets(url) 278 | sets = [] 279 | client = OAI::Client.new url 280 | response = client.list_sets 281 | response.to_a.each do |set| 282 | sets << set.spec 283 | end 284 | sets 285 | end 286 | 287 | def directory_acceptable(dir) 288 | if not (dir && File.exists?(dir) && File.writable?(dir)) 289 | puts "Directory doesn't exist, or isn't writtable." 290 | return false 291 | end 292 | true 293 | end 294 | 295 | def expand_period(str) 296 | return str if Config::PERIODS.include?(str) 297 | Config::PERIODS.each { |p| return p if p =~ /^#{str}/} 298 | nil 299 | end 300 | 301 | def parse_emails(emails) 302 | return nil unless emails 303 | addresses = emails.split(/[,\s|\s|,]/) 304 | end 305 | 306 | def list_config 307 | display("storage directory", @conf.storage, 20) 308 | display("email", @conf.email.join(', '), 20) if @conf.email 309 | display("mail server", @conf.mail_server, 20) if @conf.mail_server 310 | display("log location", @conf.logfile, 20) if @conf.logfile 311 | end 312 | 313 | def list_sites 314 | banner "Sites" 315 | @conf.sites.each_key { |site| print_site(site) } 316 | end 317 | 318 | def print_site(site) 319 | puts site 320 | @conf.sites[site].each { |k,v| display(k, v, 15)} 321 | end 322 | 323 | def setup_cron 324 | banner "Scheduling Automatic Harvesting" 325 | puts "To activate automatic harvesting you must add an entry to" 326 | puts "your scheduler. Linux/Mac OS X users should add the following" 327 | puts "entry to their crontabs:\n\n" 328 | puts "0 0 * * * #{$0} -D\n\n" 329 | puts "Windows users should use WinAt to schedule" 330 | puts "#{$0} to run every night.\n\n\n" 331 | end 332 | 333 | end 334 | 335 | end 336 | end 337 | 338 | -------------------------------------------------------------------------------- /lib/oai/provider/metadata_format.rb: -------------------------------------------------------------------------------- 1 | require 'singleton' 2 | 3 | module OAI::Provider::Metadata 4 | # == Metadata Base Class 5 | # 6 | # MetadataFormat is the base class from which all other format classes 7 | # should inherit. Format classes provide mapping of record fields into XML. 8 | # 9 | # * prefix - contains the metadata_prefix used to select the format 10 | # * schema - location of the xml schema 11 | # * namespace - location of the namespace document 12 | # * element_namespace - the namespace portion of the XML elements 13 | # * fields - list of fields in this metadata format 14 | # 15 | # See OAI::Metadata::DublinCore for an example 16 | # 17 | class Format 18 | include Singleton 19 | 20 | attr_accessor :prefix, :schema, :namespace, :element_namespace, :fields 21 | 22 | # Provided a model, and a record belonging to that model this method 23 | # will return an xml represention of the record. This is the method 24 | # that should be extended if you need to create more complex xml 25 | # representations. 26 | def encode(model, record) 27 | if record.respond_to?("to_#{prefix}") 28 | record.send("to_#{prefix}") 29 | else 30 | xml = Builder::XmlMarkup.new 31 | map = model.respond_to?("map_#{prefix}") ? model.send("map_#{prefix}") : {} 32 | xml.tag!("#{prefix}:#{element_namespace}", header_specification) do 33 | fields.each do |field| 34 | values = value_for(field, record, map) 35 | if values.respond_to?(:each) 36 | values.each do |value| 37 | xml.tag! "#{element_namespace}:#{field}", value 38 | end 39 | else 40 | xml.tag! "#{element_namespace}:#{field}", values 41 | end 42 | end 43 | end 44 | xml.target! 45 | end 46 | end 47 | 48 | private 49 | 50 | # We try a bunch of different methods to get the data from the model. 51 | # 52 | # 1. Check if the model defines a field mapping for the field of 53 | # interest. 54 | # 2. Try calling the pluralized name method on the model. 55 | # 3. Try calling the singular name method on the model 56 | def value_for(field, record, map) 57 | method = map[field] ? map[field].to_s : field.to_s 58 | 59 | if record.respond_to?(pluralize(method)) 60 | record.send pluralize(method) 61 | elsif method != 'type' and record.respond_to?(method) 62 | # TODO: this calls type, which is deprecated, should be 63 | record.send method 64 | else 65 | [] 66 | end 67 | end 68 | 69 | # Subclasses must override 70 | def header_specification 71 | raise NotImplementedError.new 72 | end 73 | 74 | # Shamelessly lifted form ActiveSupport. Thanks Rails community! 75 | def pluralize(word) 76 | # Use ActiveSupports pluralization if it's available. 77 | return word.pluralize if word.respond_to?(:pluralize) 78 | 79 | # Otherwise use our own simple pluralization rules. 80 | result = word.to_s.dup 81 | 82 | # Uncountable words 83 | return result if %w(equipment information rice money species series fish sheep).include?(result) 84 | 85 | # Irregular words 86 | { 'person' => 'people', 'man' => 'men', 'child' => 'children', 'sex' => 'sexes', 87 | 'move' => 'moves', 'cow' => 'kine' }.each { |k,v| return v if word == k } 88 | 89 | rules.each { |(rule, replacement)| break if result.gsub!(rule, replacement) } 90 | result 91 | end 92 | 93 | def rules 94 | [ 95 | [/$/, 's'], 96 | [/s$/i, 's'], 97 | [/(ax|test)is$/i, '\1es'], 98 | [/(octop|vir)us$/i, '\1i'], 99 | [/(alias|status)$/i, '\1es'], 100 | [/(bu)s$/i, '\1ses'], 101 | [/(buffal|tomat)o$/i, '\1oes'], 102 | [/([ti])um$/i, '\1a'], 103 | [/sis$/i, 'ses'], 104 | [/(?:([^f])fe|([lr])f)$/i, '\1\2ves'], 105 | [/(hive)$/i, '\1s'], 106 | [/([^aeiouy]|qu)y$/i, '\1ies'], 107 | [/(x|ch|ss|sh)$/i, '\1es'], 108 | [/(matr|vert|ind)(?:ix|ex)$/i, '\1ices'], 109 | [/([m|l])ouse$/i, '\1ice'], 110 | [/^(ox)$/i, '\1en'], 111 | [/(quiz)$/i, '\1zes'] 112 | ] 113 | end 114 | 115 | end 116 | 117 | end 118 | 119 | Dir.glob(File.dirname(__FILE__) + '/metadata_format/*.rb').each {|lib| require lib} 120 | -------------------------------------------------------------------------------- /lib/oai/provider/metadata_format/oai_dc.rb: -------------------------------------------------------------------------------- 1 | module OAI::Provider::Metadata 2 | 3 | # Simple implementation of the Dublin Core metadata format. 4 | class DublinCore < Format 5 | 6 | def initialize 7 | @prefix = 'oai_dc' 8 | @schema = 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd' 9 | @namespace = 'http://www.openarchives.org/OAI/2.0/oai_dc/' 10 | @element_namespace = 'dc' 11 | @fields = [ :title, :creator, :subject, :description, :publisher, 12 | :contributor, :date, :type, :format, :identifier, 13 | :source, :language, :relation, :coverage, :rights] 14 | end 15 | 16 | def header_specification 17 | { 18 | 'xmlns:oai_dc' => "http://www.openarchives.org/OAI/2.0/oai_dc/", 19 | 'xmlns:dc' => "http://purl.org/dc/elements/1.1/", 20 | 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance", 21 | 'xsi:schemaLocation' => 22 | %{http://www.openarchives.org/OAI/2.0/oai_dc/ 23 | http://www.openarchives.org/OAI/2.0/oai_dc.xsd}.gsub(/\s+/, ' ') 24 | } 25 | end 26 | 27 | end 28 | end 29 | -------------------------------------------------------------------------------- /lib/oai/provider/model.rb: -------------------------------------------------------------------------------- 1 | module OAI::Provider 2 | # = OAI::Provider::Model 3 | # 4 | # Model implementers should subclass OAI::Provider::Model and override 5 | # Model#earliest, Model#latest, and Model#find. Optionally Model#sets and 6 | # Model#deleted? can be used to support sets and record deletions. It 7 | # is also the responsibility of the model implementer to account for 8 | # resumption tokens if support is required. Models that don't support 9 | # resumption tokens should raise an exception if a limit is requested 10 | # during initialization. 11 | # 12 | # earliest - should return the earliest update time in the repository. 13 | # latest - should return the most recent update time in the repository. 14 | # sets - should return an array of sets supported by the repository. 15 | # deleted? - individual records returned should respond true or false 16 | # when sent the deleted? message. 17 | # available_formats - if overridden, individual records should return an 18 | # array of prefixes for all formats in which that record is available, 19 | # if other than ["oai_dc"] 20 | # about - if overridden, should return a String or Array of XML Strings to 21 | # insert into the OAI Record chunks. 22 | # 23 | # == Resumption Tokens 24 | # 25 | # For examples of using resumption tokens see the 26 | # ActiveRecordWrapper, and ActiveRecordCachingWrapper classes. 27 | # 28 | # There are several helper models for dealing with resumption tokens please 29 | # see the ResumptionToken class for more details. 30 | # 31 | class Model 32 | attr_reader :timestamp_field, :identifier_field, :limit 33 | 34 | def initialize(limit = nil, timestamp_field = 'updated_at', identifier_field = 'id') 35 | @limit = limit 36 | @identifier_field = identifier_field 37 | @timestamp_field = timestamp_field 38 | end 39 | 40 | # should return the earliest timestamp available from this model. 41 | def earliest 42 | raise NotImplementedError.new 43 | end 44 | 45 | # should return the latest timestamp available from this model. 46 | def latest 47 | raise NotImplementedError.new 48 | end 49 | 50 | def sets 51 | nil 52 | end 53 | 54 | # find is the core method of a model, it returns records from the model 55 | # bases on the parameters passed in. 56 | # 57 | # selector can be a singular id, or the symbol :all 58 | # options is a hash of options to be used to constrain the query. 59 | # 60 | # Valid options: 61 | # * :from => earliest timestamp to be included in the results 62 | # * :until => latest timestamp to be included in the results 63 | # * :set => the set from which to retrieve the results 64 | # * :metadata_prefix => type of metadata requested (this may be useful if 65 | # not all records are available in all formats) 66 | def find(selector, options={}) 67 | raise NotImplementedError.new 68 | end 69 | 70 | def deleted? 71 | false 72 | end 73 | 74 | # can return a String or Array of XML Strings add as OAI Record chunks. 75 | def about record 76 | nil 77 | end 78 | end 79 | 80 | end 81 | -------------------------------------------------------------------------------- /lib/oai/provider/model/activerecord_caching_wrapper.rb: -------------------------------------------------------------------------------- 1 | require 'active_record' 2 | 3 | module OAI::Provider 4 | 5 | # ActiveRecord model class in support of the caching wrapper. 6 | class OaiToken < ActiveRecord::Base 7 | has_many :entries, -> { order("record_id ASC") }, 8 | :class_name => 'OaiEntry', :dependent => :destroy 9 | 10 | validates_uniqueness_of :token 11 | 12 | # Make sanitize_sql a public method so we can make use of it. 13 | public 14 | 15 | def self.sanitize_sql(*arg) 16 | super(*arg) 17 | end 18 | 19 | def new_record_before_save? 20 | @new_record_before_save 21 | end 22 | 23 | end 24 | 25 | # ActiveRecord model class in support of the caching wrapper. 26 | class OaiEntry < ActiveRecord::Base 27 | belongs_to :oai_token 28 | 29 | validates_uniqueness_of :record_id, :scope => :oai_token 30 | end 31 | 32 | # This class wraps an ActiveRecord model and delegates all of the record 33 | # selection/retrieval to the AR model. It accepts options for specifying 34 | # the update timestamp field, a timeout, and a limit. The limit option 35 | # is used for doing pagination with resumption tokens. The timeout is 36 | # used to expire old tokens from the cache. Default timeout is 12 hours. 37 | # 38 | # The difference between ActiveRecordWrapper and this class is how the 39 | # pagination is accomplished. ActiveRecordWrapper encodes all the 40 | # information in the token. That approach should work 99% of the time. 41 | # If you have an extremely active respository you may want to consider 42 | # the caching wrapper. The caching wrapper takes the entire result set 43 | # from a request and caches it in another database table, well tables 44 | # actually. So the result returned to the client will always be 45 | # internally consistent. 46 | # 47 | class ActiveRecordCachingWrapper < ActiveRecordWrapper 48 | 49 | attr_reader :model, :timestamp_field, :expire 50 | 51 | def initialize(model, options={}) 52 | @expire = options.delete(:timeout) || 12.hours 53 | super(model, options) 54 | end 55 | 56 | def find(selector, options={}) 57 | sweep_cache 58 | return next_set(options[:resumption_token]) if options[:resumption_token] 59 | 60 | conditions = sql_conditions(options) 61 | 62 | if :all == selector 63 | total = model.where(conditions).count 64 | if @limit && total > @limit 65 | select_partial( 66 | ResumptionToken.new(options.merge({:last => 0}))) 67 | else 68 | model.where(conditions) 69 | end 70 | else 71 | model.where(conditions).find(selector) 72 | end 73 | end 74 | 75 | protected 76 | 77 | def next_set(token_string) 78 | raise ResumptionTokenException.new unless @limit 79 | 80 | token = ResumptionToken.parse(token_string) 81 | select_partial(token) 82 | end 83 | 84 | # select a subset of the result set, and return it with a 85 | # resumption token to get the next subset 86 | def select_partial(token) 87 | oaitoken = OaiToken.find_by(token: token.to_s) 88 | 89 | if 0 == token.last && oaitoken.nil? 90 | oaitoken = OaiToken.create!(token: token.to_s) 91 | OaiToken.connection.execute("insert into " + 92 | "#{OaiEntry.table_name} (oai_token_id, record_id) " + 93 | "select #{oaitoken.id}, id from #{model.table_name} where " + 94 | "#{OaiToken.sanitize_sql(token_conditions(token))}") 95 | end 96 | 97 | raise ResumptionTokenException.new unless oaitoken 98 | 99 | total = model.where(token_conditions(token)).count 100 | # token offset should be nil if this is the last set 101 | offset = (token.last * @limit + @limit >= total) ? nil : token.last + 1 102 | PartialResult.new( 103 | hydrate_records( 104 | oaitoken.entries.limit(@limit).offset(token.last * @limit)), 105 | token.next(offset) 106 | ) 107 | end 108 | 109 | def sweep_cache 110 | OaiToken.where(["created_at < ?", Time.now - expire]).destroy_all 111 | end 112 | 113 | def hydrate_records(records) 114 | model.find(records.collect {|r| r.record_id }) 115 | end 116 | 117 | def token_conditions(token) 118 | sql_conditions token.to_conditions_hash 119 | end 120 | 121 | private 122 | 123 | def expires_at(creation) 124 | created = Time.parse(creation.strftime("%Y-%m-%d %H:%M:%S")) 125 | created.utc + expire 126 | end 127 | 128 | end 129 | end 130 | -------------------------------------------------------------------------------- /lib/oai/provider/model/activerecord_wrapper.rb: -------------------------------------------------------------------------------- 1 | require 'active_record' 2 | 3 | module OAI::Provider 4 | # This class wraps an ActiveRecord model and delegates all of the record 5 | # selection/retrieval to the AR model. It accepts options for specifying 6 | # the update timestamp field, a timeout, and a limit. The limit option 7 | # is used for doing pagination with resumption tokens. The 8 | # expiration timeout is ignored, since all necessary information is 9 | # encoded in the token. 10 | # 11 | class ActiveRecordWrapper < Model 12 | 13 | attr_reader :model, :timestamp_field, :identifier_field 14 | 15 | # If custom 'timestamp_field' is used, be aware this will be an ActiveRecord 16 | # attribute that we will limit on, so perhaps should be indexe appropriately. 17 | # 18 | # If custom `identifier_field` is used, be aware this will be an ActiveRecord 19 | # attribute that we will sort on, and use in WHERE clauses with `=` as well as 20 | # greater than/less than, so should be indexed appropriately. 21 | def initialize(model, options={}) 22 | @model = model 23 | @timestamp_field = options.delete(:timestamp_field) || 'updated_at' 24 | @identifier_field = options.delete(:identifier_field) || model.primary_key || "id" 25 | @limit = options.delete(:limit) || 100 26 | 27 | unless options.empty? 28 | raise ArgumentError.new( 29 | "Unsupported options [#{options.keys.join(', ')}]" 30 | ) 31 | end 32 | end 33 | 34 | def earliest 35 | earliest_obj = model.order("#{model.base_class.table_name}.#{timestamp_field} asc").first 36 | earliest_obj.nil? ? Time.at(0) : earliest_obj.send(timestamp_field) 37 | end 38 | 39 | def latest 40 | latest_obj = model.order("#{model.base_class.table_name}.#{timestamp_field} desc").first 41 | latest_obj.nil? ? Time.now : latest_obj.send(timestamp_field) 42 | end 43 | # A model class is expected to provide a method Model.sets that 44 | # returns all the sets the model supports. See the 45 | # activerecord_provider tests for an example. 46 | def sets 47 | model.respond_to?(:sets) ? model.sets : [] 48 | end 49 | 50 | def find(selector, options={}) 51 | find_scope = find_scope(options) 52 | return next_set(find_scope, 53 | options[:resumption_token]) if options[:resumption_token] 54 | conditions = sql_conditions(options) 55 | if :all == selector 56 | total = find_scope.where(conditions).count 57 | if @limit && total > @limit 58 | select_partial(find_scope, 59 | ResumptionToken.new(options.merge({:last => 0}))) 60 | else 61 | find_scope.where(conditions) 62 | end 63 | else 64 | find_scope.where(conditions).where(identifier_field => selector).first 65 | end 66 | end 67 | 68 | def deleted?(record) 69 | if record.respond_to?(:deleted_at) 70 | return record.deleted_at 71 | elsif record.respond_to?(:deleted) 72 | return record.deleted 73 | end 74 | false 75 | end 76 | 77 | def respond_to?(m, *args) 78 | if m =~ /^map_/ 79 | model.respond_to?(m, *args) 80 | else 81 | super 82 | end 83 | end 84 | 85 | def method_missing(m, *args, &block) 86 | if m =~ /^map_/ 87 | model.send(m, *args, &block) 88 | else 89 | super 90 | end 91 | end 92 | 93 | protected 94 | 95 | def find_scope(options) 96 | return model unless options.key?(:set) 97 | 98 | # Find the set or return an empty scope 99 | set = find_set_by_spec(options[:set]) 100 | return model.limit(0) if set.nil? 101 | 102 | # If the set has a backward relationship, we'll use it 103 | if set.class.respond_to?(:reflect_on_all_associations) 104 | set.class.reflect_on_all_associations.each do |assoc| 105 | return set.send(assoc.name) if assoc.klass == model 106 | end 107 | end 108 | 109 | # Search the attributes for 'set' 110 | if model.column_names.include?('set') 111 | # Scope using the set attribute as the spec 112 | model.where(set: options[:set]) 113 | else 114 | # Default to empty set, as we've tried everything else 115 | model.none 116 | end 117 | end 118 | 119 | def find_set_by_spec(spec) 120 | if sets.class == ActiveRecord::Relation 121 | sets.find_by_spec(spec) 122 | else 123 | sets.detect {|set| set.spec == spec} 124 | end 125 | end 126 | 127 | # Request the next set in this sequence. 128 | def next_set(find_scope, token_string) 129 | raise OAI::ResumptionTokenException.new unless @limit 130 | 131 | token = ResumptionToken.parse(token_string) 132 | select_partial(find_scope, token) 133 | end 134 | 135 | # select a subset of the result set, and return it with a 136 | # resumption token to get the next subset 137 | def select_partial(find_scope, token) 138 | records = find_scope.where(token_conditions(token)) 139 | .limit(@limit) 140 | .order("#{model.base_class.table_name}.#{identifier_field} asc") 141 | raise OAI::ResumptionTokenException.new unless records 142 | 143 | total = find_scope.where(token_conditions(token)).count 144 | # token offset should be nil if this is the last set 145 | offset = (@limit >= total) ? nil : records.last.send(identifier_field) 146 | PartialResult.new(records, token.next(offset)) 147 | end 148 | 149 | # build a sql conditions statement from the content 150 | # of a resumption token. It is very important not to 151 | # miss any changes as records may change scope as the 152 | # harvest is in progress. To avoid loosing any changes 153 | # the last 'id' of the previous set is used as the 154 | # filter to the next set. 155 | def token_conditions(token) 156 | last_id = token.last_str 157 | sql = sql_conditions token.to_conditions_hash 158 | 159 | return sql if "0" == last_id 160 | # Now add last id constraint 161 | sql.first << " AND #{model.base_class.table_name}.#{identifier_field} > :id" 162 | sql.last[:id] = last_id 163 | 164 | return sql 165 | end 166 | 167 | # build a sql conditions statement from an OAI options hash 168 | def sql_conditions(opts) 169 | sql = [] 170 | esc_values = {} 171 | if opts.has_key?(:from) 172 | sql << "#{model.base_class.table_name}.#{timestamp_field} >= :from" 173 | esc_values[:from] = parse_to_local(opts[:from]) 174 | end 175 | if opts.has_key?(:until) 176 | # Handle databases which store fractions of a second by rounding up 177 | sql << "#{model.base_class.table_name}.#{timestamp_field} < :until" 178 | esc_values[:until] = parse_to_local(opts[:until]) { |t| t + 1 } 179 | end 180 | 181 | return [sql.join(" AND "), esc_values] 182 | end 183 | 184 | private 185 | 186 | def parse_to_local(time) 187 | if time.respond_to?(:strftime) 188 | time_obj = time 189 | else 190 | begin 191 | if time[-1] == "Z" 192 | time_obj = Time.strptime(time, "%Y-%m-%dT%H:%M:%S%Z") 193 | else 194 | time_obj = Date.strptime(time, "%Y-%m-%d") 195 | end 196 | rescue 197 | raise OAI::ArgumentException.new, "unparsable date: '#{time}'" 198 | end 199 | end 200 | 201 | time_obj = yield(time_obj) if block_given? 202 | 203 | if time_obj.kind_of?(Date) 204 | time_obj.strftime("%Y-%m-%d") 205 | else 206 | # Convert to same as DB - :local => :getlocal, :utc => :getutc 207 | if ActiveRecord::VERSION::MAJOR >= 7 208 | tzconv = "get#{ActiveRecord.default_timezone.to_s}".to_sym 209 | else 210 | tzconv = "get#{model.default_timezone.to_s}".to_sym 211 | end 212 | time_obj.send(tzconv).strftime("%Y-%m-%d %H:%M:%S") 213 | end 214 | end 215 | 216 | end 217 | end 218 | -------------------------------------------------------------------------------- /lib/oai/provider/partial_result.rb: -------------------------------------------------------------------------------- 1 | module OAI::Provider 2 | # = OAI::Provider::PartialResult 3 | # 4 | # PartialResult is used for returning a set/page of results from a model 5 | # that supports resumption tokens. It should contain and array of 6 | # records, and a resumption token for getting the next set/page. 7 | # 8 | class PartialResult 9 | attr_reader :records, :token 10 | 11 | def initialize(records, token = nil) 12 | @records = records 13 | @token = token 14 | end 15 | 16 | end 17 | 18 | end 19 | -------------------------------------------------------------------------------- /lib/oai/provider/response.rb: -------------------------------------------------------------------------------- 1 | require 'builder' unless defined?(Builder) 2 | module OAI 3 | module Provider 4 | module Response 5 | 6 | class Base 7 | attr_reader :provider, :options 8 | 9 | class << self 10 | attr_reader :valid_options, :default_options, :required_options 11 | def valid_parameters(*args) 12 | @valid_options ||= [] 13 | @valid_options = (@valid_options + args.dup).uniq 14 | end 15 | 16 | def default_parameters(options = {}) 17 | @default_options ||= {} 18 | @default_options.merge! options.dup 19 | end 20 | 21 | def required_parameters(*args) 22 | valid_parameters(*args) 23 | @required_options ||= [] 24 | @required_options = (@required_options + args.dup).uniq 25 | end 26 | 27 | end 28 | def initialize(provider, options = {}) 29 | @provider = provider 30 | @request_options = options.dup 31 | @options = internalize(options) 32 | raise OAI::ArgumentException.new unless valid? 33 | end 34 | def response 35 | @builder = Builder::XmlMarkup.new 36 | @builder.instruct! :xml, :version=>"1.0", :encoding=>"UTF-8" 37 | @builder.tag!('OAI-PMH', header) do 38 | @builder.responseDate Time.now.utc.xmlschema 39 | @builder.request(provider.url, (@request_options.merge(:verb => verb) unless self.class == Error)) 40 | yield @builder 41 | end 42 | end 43 | 44 | protected 45 | 46 | def self.parse_date(value) 47 | return value if value.respond_to?(:strftime) 48 | 49 | if value[-1] == "Z" 50 | Time.strptime(value, "%Y-%m-%dT%H:%M:%S%Z").utc 51 | else 52 | Date.strptime(value, "%Y-%m-%d") 53 | end 54 | rescue ArgumentError => e 55 | raise OAI::ArgumentException.new, "unparsable date: '#{value}'" 56 | end 57 | 58 | private 59 | 60 | def header 61 | { 62 | 'xmlns' => "http://www.openarchives.org/OAI/2.0/", 63 | 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance", 64 | 'xsi:schemaLocation' => %{http://www.openarchives.org/OAI/2.0/ 65 | http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd}.gsub(/\s+/, ' ') 66 | } 67 | end 68 | def extract_identifier(id) 69 | id.sub("#{provider.prefix}:", '') 70 | end 71 | 72 | def valid? 73 | return true if resumption? 74 | 75 | return true if self.class.valid_options.nil? and options.empty? 76 | 77 | # check if the request includes an argument and there are no valid 78 | # arguments for that verb (Identify, for example). 79 | raise OAI::ArgumentException.new if self.class.valid_options.nil? && !options.empty? 80 | 81 | if self.class.required_options 82 | return false unless (self.class.required_options - @options.keys).empty? 83 | end 84 | return false unless (@options.keys - self.class.valid_options).empty? 85 | populate_defaults 86 | end 87 | 88 | def populate_defaults 89 | self.class.default_options.each do |k,v| 90 | @options[k] = v.respond_to?(:call) ? v.call(self) : v if not @options[k] 91 | end 92 | end 93 | 94 | def resumption? 95 | if @options.keys.include?(:resumption_token) 96 | return true if 1 == @options.keys.size 97 | raise OAI::ArgumentException.new 98 | end 99 | end 100 | 101 | # Convert our internal representations back into standard OAI options 102 | def externalize(value) 103 | value.to_s.gsub(/_[a-z]/) { |m| m.sub("_", '').capitalize } 104 | end 105 | 106 | def parse_date(value) 107 | self.class.parse_date(value) 108 | end 109 | 110 | def internalize(hash = {}) 111 | internal = {} 112 | hash.keys.each do |key| 113 | internal[key.to_s.gsub(/([A-Z])/, '_\1').downcase.intern] = hash[key].dup 114 | end 115 | 116 | # Convert date formated strings into internal time values 117 | # Convert date formated strings in dates. 118 | internal[:from] = parse_date(internal[:from]) if internal[:from] 119 | internal[:until] = parse_date(internal[:until]) if internal[:until] 120 | 121 | internal 122 | end 123 | 124 | def verb 125 | self.class.to_s.split('::').last 126 | end 127 | 128 | end 129 | 130 | end 131 | end 132 | end 133 | 134 | -------------------------------------------------------------------------------- /lib/oai/provider/response/error.rb: -------------------------------------------------------------------------------- 1 | module OAI::Provider::Response 2 | class Error < Base 3 | 4 | def initialize(provider, error) 5 | super(provider) 6 | @error = error 7 | end 8 | 9 | def to_xml 10 | response do |r| 11 | r.error @error.to_s, :code => @error.code 12 | end 13 | end 14 | 15 | end 16 | end -------------------------------------------------------------------------------- /lib/oai/provider/response/get_record.rb: -------------------------------------------------------------------------------- 1 | module OAI::Provider::Response 2 | 3 | class GetRecord < RecordResponse 4 | required_parameters :identifier, :metadata_prefix 5 | 6 | def to_xml 7 | id = extract_identifier(options.delete(:identifier)) 8 | unless record = provider.model.find(id, options) 9 | raise OAI::IdException.new 10 | end 11 | 12 | response do |r| 13 | r.GetRecord do 14 | r.record do 15 | header_for record 16 | data_for record unless deleted?(record) 17 | about_for record unless deleted?(record) 18 | end 19 | end 20 | end 21 | end 22 | 23 | end 24 | 25 | end 26 | 27 | 28 | -------------------------------------------------------------------------------- /lib/oai/provider/response/identify.rb: -------------------------------------------------------------------------------- 1 | module OAI::Provider::Response 2 | 3 | class Identify < Base 4 | 5 | def to_xml 6 | response do |r| 7 | r.Identify do 8 | r.repositoryName provider.name 9 | r.baseURL provider.url 10 | r.protocolVersion 2.0 11 | if provider.email and provider.email.respond_to?(:each) 12 | provider.email.each { |address| r.adminEmail address } 13 | else 14 | r.adminEmail provider.email.to_s 15 | end 16 | r.earliestDatestamp Time.parse(provider.model.earliest.to_s).utc.xmlschema 17 | r.deletedRecord provider.delete_support.to_s 18 | r.granularity provider.granularity 19 | r.description do 20 | r.tag! 'oai-identifier', 'xmlns' => 'http://www.openarchives.org/OAI/2.0/oai-identifier', 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance', 'xsi:schemaLocation' => 'http://www.openarchives.org/OAI/2.0/oai-identifier http://www.openarchives.org/OAI/2.0/oai-identifier.xsd' do 21 | r.scheme 'oai' 22 | r.repositoryIdentifier provider.prefix.gsub(/oai:/, '') 23 | r.delimiter ':' 24 | r.sampleIdentifier "#{provider.prefix}:#{provider.identifier}" 25 | end 26 | end 27 | if provider.description 28 | r.target! << provider.description 29 | end 30 | end 31 | 32 | end 33 | end 34 | 35 | end 36 | 37 | end 38 | -------------------------------------------------------------------------------- /lib/oai/provider/response/list_identifiers.rb: -------------------------------------------------------------------------------- 1 | module OAI::Provider::Response 2 | 3 | class ListIdentifiers < RecordResponse 4 | 5 | def to_xml 6 | result = provider.model.find(:all, options) 7 | 8 | # result may be an array of records, or a partial result 9 | records = result.respond_to?(:records) ? result.records : result 10 | 11 | raise OAI::NoMatchException.new if records.nil? or records.empty? 12 | 13 | response do |r| 14 | r.ListIdentifiers do 15 | records.each do |rec| 16 | header_for rec 17 | end 18 | 19 | # append resumption token for getting next group of records 20 | if result.respond_to?(:token) 21 | r.target! << result.token.to_xml 22 | end 23 | 24 | end 25 | end 26 | end 27 | 28 | end 29 | 30 | end -------------------------------------------------------------------------------- /lib/oai/provider/response/list_metadata_formats.rb: -------------------------------------------------------------------------------- 1 | module OAI::Provider::Response 2 | class ListMetadataFormats < RecordResponse 3 | valid_parameters :identifier 4 | 5 | def to_xml 6 | # Get a list of all the formats the provider understands. 7 | formats = provider.formats.values 8 | 9 | # if it's a doc-specific request 10 | if options.include?(:identifier) 11 | id = extract_identifier(options[:identifier]) 12 | unless record = provider.model.find(id, options) 13 | raise OAI::IdException.new 14 | end 15 | 16 | # Remove any format that this particular record can't be provided in. 17 | formats.reject! { |f| !record_supports(record, f.prefix) } 18 | end 19 | response do |r| 20 | r.ListMetadataFormats do 21 | formats.each do |format| 22 | r.metadataFormat do 23 | r.metadataPrefix format.prefix 24 | r.schema format.schema 25 | r.metadataNamespace format.namespace 26 | end 27 | end 28 | end 29 | end 30 | end 31 | 32 | def record_supports(record, prefix) 33 | prefix == 'oai_dc' or 34 | record.respond_to?("to_#{prefix}") or 35 | record.respond_to?("map_#{prefix}") 36 | end 37 | 38 | end 39 | end 40 | -------------------------------------------------------------------------------- /lib/oai/provider/response/list_records.rb: -------------------------------------------------------------------------------- 1 | module OAI::Provider::Response 2 | 3 | class ListRecords < RecordResponse 4 | required_parameters :metadata_prefix 5 | 6 | def valid? 7 | super && matching_granularity? 8 | end 9 | 10 | def matching_granularity? 11 | if options[:from].nil? == false && options[:until].nil? == false && options[:from].class.name != options[:until].class.name 12 | raise OAI::ArgumentException.new, "The 'from' and 'until' options specified must have the same granularity" 13 | else 14 | true 15 | end 16 | end 17 | 18 | def to_xml 19 | result = provider.model.find(:all, options) 20 | # result may be an array of records, or a partial result 21 | records = result.respond_to?(:records) ? result.records : result 22 | 23 | raise OAI::NoMatchException.new if records.nil? or records.empty? 24 | 25 | response do |r| 26 | r.ListRecords do 27 | records.each do |rec| 28 | r.record do 29 | header_for rec 30 | data_for rec unless deleted?(rec) 31 | about_for rec unless deleted?(rec) 32 | end 33 | end 34 | 35 | # append resumption token for getting next group of records 36 | if result.respond_to?(:token) 37 | r.target! << result.token.to_xml 38 | end 39 | 40 | end 41 | end 42 | end 43 | 44 | end 45 | 46 | end 47 | 48 | -------------------------------------------------------------------------------- /lib/oai/provider/response/list_sets.rb: -------------------------------------------------------------------------------- 1 | module OAI::Provider::Response 2 | 3 | class ListSets < Base 4 | 5 | def to_xml 6 | raise OAI::SetException.new unless provider.model.sets 7 | 8 | response do |r| 9 | r.ListSets do 10 | provider.model.sets.each do |set| 11 | r.set do 12 | r.setSpec set.spec 13 | r.setName set.name 14 | r.setDescription(set.description) if set.respond_to?(:description) 15 | end 16 | end 17 | end 18 | end 19 | end 20 | 21 | end 22 | 23 | end 24 | -------------------------------------------------------------------------------- /lib/oai/provider/response/record_response.rb: -------------------------------------------------------------------------------- 1 | module OAI::Provider::Response 2 | class RecordResponse < Base 3 | def self.inherited(klass) 4 | klass.valid_parameters :metadata_prefix, :from, :until, :set 5 | klass.default_parameters :metadata_prefix => "oai_dc", 6 | :from => method(:default_from).to_proc, 7 | :until => method(:default_until).to_proc 8 | end 9 | 10 | def self.default_from(response) 11 | value = Time.parse(response.provider.model.earliest.to_s).utc 12 | if response.options[:until] 13 | u = parse_date(response.options[:until]) 14 | value = value.to_date if u.is_a? Date 15 | end 16 | value 17 | end 18 | 19 | def self.default_until(response) 20 | value = Time.parse(response.provider.model.latest.to_s).utc 21 | if response.options[:from] 22 | f = parse_date(response.options[:from]) 23 | value = value.to_date if f.is_a? Date 24 | end 25 | value 26 | end 27 | 28 | # emit record header 29 | def header_for(record) 30 | param = Hash.new 31 | param[:status] = 'deleted' if deleted?(record) 32 | @builder.header param do 33 | @builder.identifier identifier_for(record) 34 | @builder.datestamp timestamp_for(record) 35 | sets_for(record).each do |set| 36 | @builder.setSpec set.spec 37 | end 38 | end 39 | end 40 | # metadata - core routine for delivering metadata records 41 | # 42 | def data_for(record) 43 | @builder.metadata do 44 | @builder.target! << provider.format(requested_format).encode(provider.model, record) 45 | end 46 | end 47 | 48 | # about - core routine for delivering about records 49 | # 50 | def about_for(record) 51 | return unless provider.model.respond_to? :about 52 | 53 | about = provider.model.about(record) 54 | return if about.nil? 55 | 56 | unless about.is_a? Array 57 | about = [about] 58 | end 59 | 60 | about.each do |a| 61 | @builder.about do 62 | @builder.target! << a 63 | end 64 | end 65 | end 66 | 67 | private 68 | 69 | # Namespace syntax suggested in http://www.openarchives.org/OAI/2.0/guidelines-oai-identifier.htm 70 | def identifier_for(record) 71 | "#{provider.prefix}:#{record.send( provider.model.identifier_field )}" 72 | end 73 | 74 | def timestamp_for(record) 75 | record.send(provider.model.timestamp_field).utc.xmlschema 76 | end 77 | 78 | def sets_for(record) 79 | return [] unless record.respond_to?(:sets) and record.sets 80 | record.sets.respond_to?(:each) ? record.sets : [record.sets] 81 | end 82 | 83 | def requested_format 84 | format = 85 | if options[:metadata_prefix] 86 | options[:metadata_prefix] 87 | elsif options[:resumption_token] 88 | OAI::Provider::ResumptionToken.extract_format(options[:resumption_token]) 89 | end 90 | raise OAI::FormatException.new unless provider.format_supported?(format) 91 | 92 | format 93 | end 94 | 95 | def deleted?(record) 96 | return record.deleted? if record.respond_to?(:deleted?) 97 | return record.deleted if record.respond_to?(:deleted) 98 | return record.deleted_at if record.respond_to?(:deleted_at) 99 | false 100 | end 101 | 102 | end 103 | end 104 | -------------------------------------------------------------------------------- /lib/oai/provider/resumption_token.rb: -------------------------------------------------------------------------------- 1 | require 'time' 2 | require File.dirname(__FILE__) + "/partial_result" 3 | 4 | module OAI::Provider 5 | # = OAI::Provider::ResumptionToken 6 | # 7 | # The ResumptionToken class forms the basis of paging query results. It 8 | # provides several helper methods for dealing with resumption tokens. 9 | # 10 | # OAI-PMH spec does not specify anything about resumptionToken format, they can 11 | # be purely opaque tokens. 12 | # 13 | # Our implementation however encodes everything needed to construct the next page 14 | # inside the resumption token. 15 | # 16 | # == The 'last' component: offset or ID/pk to resume from 17 | # 18 | # The `#last` component is an offset or ID to resume from. In the case of it being 19 | # an ID to resume from, this assumes that ID's are sortable and results are returned 20 | # in ID order, so that the 'last' ID can be used as the place to resume from. 21 | # 22 | # Originally it was assumed that #last was always an integer, but since existing 23 | # implementations (like ActiveRecordWrapper) used it as an ID, and identifiers and 24 | # primary keys are _not_ always integers (can be UUID etc), we have expanded to allow 25 | # any string value. 26 | # 27 | # However, for backwards compatibility #last always returns an integer (sometimes 0 if 28 | # actual last component is not an integer), and #last_str returns the full string version. 29 | # Trying to change #last itself to be string broke a lot of existing code in this gem 30 | # in mysterious ways. 31 | # 32 | # Also beware that in some cases the value 0/"0" seems to be a special value used 33 | # to signify some special case. A lot of "code archeology" going on here after significant 34 | # period of no maintenance to this gem. 35 | class ResumptionToken 36 | attr_reader :prefix, :set, :from, :until, :last, :last_str, :expiration, :total 37 | 38 | # parses a token string and returns a ResumptionToken 39 | def self.parse(token_string, expiration = nil, total = nil) 40 | begin 41 | options = {} 42 | matches = /(.+):([^ :]+)$/.match(token_string) 43 | options[:last] = matches.captures[1] 44 | 45 | parts = matches.captures[0].split('.') 46 | options[:metadata_prefix] = parts.shift 47 | parts.each do |part| 48 | case part 49 | when /^s/ 50 | options[:set] = part.sub(/^s\(/, '').sub(/\)$/, '') 51 | when /^f/ 52 | options[:from] = Time.parse(part.sub(/^f\(/, '').sub(/\)$/, '')).localtime 53 | when /^u/ 54 | options[:until] = Time.parse(part.sub(/^u\(/, '').sub(/\)$/, '')).localtime 55 | end 56 | end 57 | self.new(options, expiration, total) 58 | rescue => err 59 | raise OAI::ResumptionTokenException.new 60 | end 61 | end 62 | 63 | # extracts the metadata prefix from a token string 64 | def self.extract_format(token_string) 65 | return token_string.split('.')[0] 66 | end 67 | 68 | def initialize(options, expiration = nil, total = nil) 69 | @prefix = options[:metadata_prefix] 70 | @set = options[:set] 71 | self.last = options[:last] 72 | @from = options[:from] if options[:from] 73 | @until = options[:until] if options[:until] 74 | @expiration = expiration if expiration 75 | @total = total if total 76 | end 77 | 78 | # convenience method for setting the offset of the next set of results 79 | def next(last) 80 | self.last = last 81 | self 82 | end 83 | 84 | def ==(other) 85 | prefix == other.prefix and set == other.set and from == other.from and 86 | self.until == other.until and last == other.last and 87 | expiration == other.expiration and total == other.total 88 | end 89 | 90 | # output an xml resumption token 91 | def to_xml 92 | xml = Builder::XmlMarkup.new 93 | xml.resumptionToken(encode_conditions, hash_of_attributes) 94 | xml.target! 95 | end 96 | 97 | # return a hash containing just the model selection parameters 98 | def to_conditions_hash 99 | conditions = {:metadata_prefix => self.prefix } 100 | conditions[:set] = self.set if self.set 101 | conditions[:from] = self.from if self.from 102 | conditions[:until] = self.until if self.until 103 | conditions 104 | end 105 | 106 | # return the a string representation of the token minus the offset/ID 107 | # 108 | # Q: Why does it eliminate the offset/id "last" on the end? Doesn't fully 109 | # represent state without it, which is confusing. Not sure, but 110 | # other code seems to rely on it, tests break if not. 111 | def to_s 112 | encode_conditions.gsub(/:\w+?$/, '') 113 | end 114 | 115 | private 116 | 117 | # take care of our logic to store an integer and a str version, for backwards 118 | # compat where it was assumed to be an integer, as well as supporting string. 119 | def last=(value) 120 | @last = value.to_i 121 | @last_str = value.to_s 122 | end 123 | 124 | def encode_conditions 125 | return "" if last_str.nil? || last_str.to_s.strip.eql?("") 126 | 127 | encoded_token = @prefix.to_s.dup 128 | encoded_token << ".s(#{set})" if set 129 | if self.from 130 | if self.from.respond_to?(:utc) 131 | encoded_token << ".f(#{self.from.utc.xmlschema})" 132 | else 133 | encoded_token << ".f(#{self.from.xmlschema})" 134 | end 135 | end 136 | if self.until 137 | if self.until.respond_to?(:utc) 138 | encoded_token << ".u(#{self.until.utc.xmlschema})" 139 | else 140 | encoded_token << ".u(#{self.until.xmlschema})" 141 | end 142 | end 143 | encoded_token << ":#{last_str}" 144 | end 145 | 146 | def hash_of_attributes 147 | attributes = {} 148 | attributes[:completeListSize] = self.total if self.total 149 | attributes[:expirationDate] = self.expiration.utc.xmlschema if self.expiration 150 | attributes 151 | end 152 | 153 | end 154 | 155 | end 156 | -------------------------------------------------------------------------------- /lib/oai/set.rb: -------------------------------------------------------------------------------- 1 | module OAI 2 | 3 | # bundles up information about a set retrieved during a 4 | # ListSets request 5 | 6 | class Set 7 | include OAI::XPath 8 | attr_accessor :name, :spec, :description 9 | 10 | def initialize(values = {}) 11 | @name = values.delete(:name) 12 | @spec = values.delete(:spec) 13 | @description = values.delete(:description) 14 | raise ArgumentException, "Invalid options" unless values.empty? 15 | end 16 | 17 | def self.parse(element) 18 | set = self.new 19 | set.name = set.xpath(element, './/setName') 20 | set.spec = set.xpath(element, './/setSpec') 21 | set.description = set.xpath_first(element, './/setDescription') 22 | set 23 | end 24 | 25 | def to_s 26 | "#{@name} [#{@spec}]" 27 | end 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /lib/oai/xpath.rb: -------------------------------------------------------------------------------- 1 | module OAI 2 | module XPath 3 | 4 | # get all matching nodes 5 | def xpath_all(doc, path) 6 | case parser_type(doc) 7 | when 'libxml' 8 | return doc.find(path).to_a if doc.find(path) 9 | when 'rexml' 10 | return REXML::XPath.match(doc, path) 11 | end 12 | return [] 13 | end 14 | 15 | # get first matching node 16 | def xpath_first(doc, path) 17 | elements = xpath_all(doc, path) 18 | return elements[0] if elements != nil 19 | return nil 20 | end 21 | 22 | # get text for first matching node 23 | def xpath(doc, path) 24 | el = xpath_first(doc, path) 25 | return unless el 26 | case parser_type(doc) 27 | when 'libxml' 28 | return el.content 29 | when 'rexml' 30 | return el.text 31 | end 32 | return nil 33 | end 34 | 35 | # figure out an attribute 36 | def get_attribute(node, attr_name) 37 | case node.class.to_s 38 | when 'REXML::Element' 39 | return node.attribute(attr_name) 40 | when 'LibXML::XML::Node' 41 | #There has been a method shift between 0.5 and 0.7 42 | if defined?(node.property) == nil 43 | return node.attributes[attr_name] 44 | else 45 | #node.property is being deprecated. We'll eventually remove 46 | #this trap 47 | begin 48 | return node[attr_name] 49 | rescue 50 | return node.property(attr_name) 51 | end 52 | end 53 | end 54 | return nil 55 | end 56 | 57 | private 58 | 59 | # figure out what sort of object we should do xpath on 60 | def parser_type(x) 61 | case x.class.to_s 62 | when 'LibXML::XML::Document' 63 | return 'libxml' 64 | when 'LibXML::XML::Node' 65 | return 'libxml' 66 | when 'LibXML::XML::Node::Set' 67 | return 'libxml' 68 | when 'REXML::Element' 69 | return 'rexml' 70 | when 'REXML::Document' 71 | return 'rexml' 72 | end 73 | end 74 | end 75 | end 76 | -------------------------------------------------------------------------------- /lib/test.rb: -------------------------------------------------------------------------------- 1 | require 'oai' 2 | 3 | buffer = "" 4 | start_time = Time.now() 5 | 6 | client = OAI::Client.new 'http://digitalcollections.library.oregonstate.edu/cgi-bin/oai.exe', :parser =>'libxml' 7 | 8 | last_check = Date.new(2006,9,5) 9 | records = client.list_records 10 | # :set => 'archives', :metadata_prefix => 'oai_dc', :from => last_check 11 | 12 | x = 0 13 | records.each do |record| 14 | #fields = record.serialize_metadata(record.metadata, "oai_dc", "Oai_Dc") 15 | #puts "Primary Title: " + fields.title[0] + "\n" 16 | puts "Identifier: " + record.header.identifier + "\n" 17 | x += 1 18 | end 19 | 20 | end_time = Time.now() 21 | 22 | puts buffer 23 | puts "Time to run: " + (end_time - start_time).to_s + "\n" 24 | puts "Records returned: " + x.to_s 25 | 26 | -------------------------------------------------------------------------------- /ruby-oai.gemspec: -------------------------------------------------------------------------------- 1 | Gem::Specification.new do |s| 2 | s.name = 'oai' 3 | s.version = '1.3.0' 4 | s.author = 'Ed Summers' 5 | s.email = 'ehs@pobox.com' 6 | s.homepage = 'http://github.com/code4lib/ruby-oai' 7 | s.platform = Gem::Platform::RUBY 8 | s.summary = 'A ruby library for working with the Open Archive Initiative Protocol for Metadata Harvesting (OAI-PMH)' 9 | s.license = 'MIT' 10 | s.require_path = 'lib' 11 | s.autorequire = 'oai' 12 | s.bindir = 'bin' 13 | s.executables = 'oai' 14 | 15 | s.add_dependency('builder', '>=3.1.0') 16 | s.add_dependency('faraday', "< 3") 17 | s.add_dependency("faraday-follow_redirects", ">= 0.3.0", "< 2") 18 | s.add_dependency("rexml") # rexml becomes bundled gem in ruby 3.0 19 | 20 | 21 | s.add_development_dependency "activerecord", ">= 5.2.0", "< 8.1" 22 | s.add_development_dependency "appraisal" 23 | s.add_development_dependency "webrick" 24 | 25 | 26 | s.files = %w(README.md Rakefile) + 27 | Dir.glob("{bin,test,lib}/**/*") + 28 | Dir.glob("examples/**/*.rb") 29 | end 30 | -------------------------------------------------------------------------------- /test/activerecord_provider/config/connection.rb: -------------------------------------------------------------------------------- 1 | require 'active_record' 2 | require 'logger' 3 | 4 | # Configure AR connection 5 | #ActiveRecord::Base.logger = Logger.new(STDOUT) 6 | 7 | if RUBY_PLATFORM == "java" 8 | require 'jdbc/sqlite3' 9 | Jdbc::SQLite3.load_driver 10 | end 11 | 12 | ActiveRecord::Migration.verbose = false 13 | ActiveRecord::Base.establish_connection :adapter => "sqlite3", 14 | :database => ":memory:" 15 | 16 | if ActiveRecord.version < Gem::Version.new("6.0.0") 17 | ActiveRecord::MigrationContext.new( 18 | File.join(File.dirname(__FILE__), '..', 'database') 19 | ).migrate 20 | elsif ActiveRecord.version < Gem::Version.new("7.2.0") 21 | ActiveRecord::MigrationContext.new( 22 | File.join(File.dirname(__FILE__), '..', 'database'), 23 | ActiveRecord::Base.connection.schema_migration 24 | ).migrate 25 | else 26 | ActiveRecord::MigrationContext.new( 27 | File.join(File.dirname(__FILE__), '..', 'database') 28 | ).migrate 29 | end 30 | 31 | -------------------------------------------------------------------------------- /test/activerecord_provider/database/0001_oaipmh_tables.rb: -------------------------------------------------------------------------------- 1 | class OaipmhTables < ActiveRecord::Migration[5.2] 2 | def self.up 3 | create_table :oai_tokens do |t| 4 | t.column :token, :string, :null => false 5 | t.column :created_at, :timestamp 6 | end 7 | 8 | create_table :oai_entries do |t| 9 | t.column :record_id, :integer, :null => false 10 | t.column :oai_token_id, :integer, :null => false 11 | end 12 | 13 | create_table :dc_langs do |t| 14 | t.column :name, :string 15 | t.column :updated_at, :datetime 16 | t.column :created_at, :datetime 17 | end 18 | 19 | dc_fields = proc do |t| 20 | t.column :title, :string 21 | t.column :creator, :string 22 | t.column :subject, :string 23 | t.column :description, :string 24 | t.column :contributor, :string 25 | t.column :publisher, :string 26 | t.column :date, :datetime 27 | t.column :type, :string 28 | t.column :format, :string 29 | t.column :source, :string 30 | t.column :dc_lang_id, :integer 31 | t.column :relation, :string 32 | t.column :coverage, :string 33 | t.column :rights, :string 34 | t.column :updated_at, :datetime 35 | t.column :created_at, :datetime 36 | t.column :deleted, :boolean, :default => false 37 | end 38 | 39 | create_table :exclusive_set_dc_fields do |t| 40 | dc_fields.call(t) 41 | t.column :set, :string 42 | end 43 | 44 | create_table :dc_fields, &dc_fields 45 | 46 | create_table :dc_fields_dc_sets, :id => false do |t| 47 | t.column :dc_field_id, :integer 48 | t.column :dc_set_id, :integer 49 | end 50 | 51 | create_table :dc_sets do |t| 52 | t.column :name, :string 53 | t.column :spec, :string 54 | t.column :description, :string 55 | end 56 | 57 | add_index :oai_tokens, [:token], :unique => true 58 | add_index :oai_tokens, :created_at 59 | add_index :oai_entries, [:oai_token_id] 60 | add_index :dc_fields, :updated_at 61 | add_index :dc_fields, :deleted 62 | add_index :dc_fields_dc_sets, [:dc_field_id, :dc_set_id] 63 | end 64 | 65 | def self.down 66 | drop_table :oai_tokens 67 | drop_table :dc_fields 68 | drop_table :dc_sets 69 | end 70 | end 71 | -------------------------------------------------------------------------------- /test/activerecord_provider/helpers/providers.rb: -------------------------------------------------------------------------------- 1 | require 'active_record' 2 | require 'oai' 3 | 4 | Dir.glob(File.dirname(__FILE__) + "/../models/*.rb").each do |lib| 5 | require lib 6 | end 7 | 8 | class ARProvider < OAI::Provider::Base 9 | repository_name 'ActiveRecord Based Provider' 10 | repository_url 'http://localhost' 11 | record_prefix 'oai:test' 12 | source_model ActiveRecordWrapper.new(DCField) 13 | end 14 | 15 | class ARProviderCustomIdentifierField < OAI::Provider::Base 16 | repository_name 'ActiveRecord Based Provider' 17 | repository_url 'http://localhost' 18 | record_prefix 'oai:test' 19 | source_model ActiveRecordWrapper.new(DCField, identifier_field: "source") 20 | end 21 | 22 | class ARProviderWithScope < OAI::Provider::Base 23 | DATE_LESS_THAN_RESTRICTION = Time.parse("2007-03-12 19:30:22 UTC") 24 | 25 | repository_name 'ActiveRecord Based Provider' 26 | repository_url 'http://localhost' 27 | record_prefix 'oai:test' 28 | source_model ActiveRecordWrapper.new(DCField.where("date < ?", DATE_LESS_THAN_RESTRICTION).includes(:sets)) 29 | end 30 | 31 | class SimpleResumptionProvider < OAI::Provider::Base 32 | repository_name 'ActiveRecord Resumption Provider' 33 | repository_url 'http://localhost' 34 | record_prefix 'oai:test' 35 | source_model ActiveRecordWrapper.new(DCField, :limit => 25) 36 | end 37 | 38 | class SimpleResumptionProviderWithNonIntegerID < OAI::Provider::Base 39 | repository_name 'ActiveRecord Resumption Provider With Non-Integer ID' 40 | repository_url 'http://localhost' 41 | record_prefix 'oai:test' 42 | source_model ActiveRecordWrapper.new(DCField, :limit => 25, identifier_field: "source") 43 | end 44 | 45 | class CachingResumptionProvider < OAI::Provider::Base 46 | repository_name 'ActiveRecord Caching Resumption Provider' 47 | repository_url 'http://localhost' 48 | record_prefix 'oai:test' 49 | source_model ActiveRecordCachingWrapper.new(DCField, :limit => 25) 50 | end 51 | 52 | 53 | class ARLoader 54 | def self.load 55 | fixtures = YAML.load_file( 56 | File.join(File.dirname(__FILE__), '..', 'fixtures', 'dc.yml') 57 | ) 58 | fixtures.keys.sort.each do |key| 59 | lang = DCLang.create(name: fixtures[key].delete('language')) 60 | DCField.create(fixtures[key].merge(dc_lang: lang)) 61 | end 62 | end 63 | 64 | def self.unload 65 | DCField.delete_all 66 | DCLang.delete_all 67 | end 68 | end 69 | -------------------------------------------------------------------------------- /test/activerecord_provider/helpers/set_provider.rb: -------------------------------------------------------------------------------- 1 | # Extend ActiveRecordModel to support sets 2 | class SetModel < OAI::Provider::ActiveRecordWrapper 3 | 4 | # Return all available sets 5 | def sets 6 | DCSet.all 7 | end 8 | 9 | end 10 | 11 | class ARSetProvider < OAI::Provider::Base 12 | repository_name 'ActiveRecord Set Based Provider' 13 | repository_url 'http://localhost' 14 | record_prefix = 'oai:test' 15 | source_model SetModel.new(DCField, :timestamp_field => 'date') 16 | end 17 | 18 | class ARExclusiveSetProvider < OAI::Provider::Base 19 | repository_name 'ActiveRecord Set Based Provider' 20 | repository_url 'http://localhost' 21 | record_prefix = 'oai:test' 22 | source_model OAI::Provider::ActiveRecordWrapper.new( 23 | ExclusiveSetDCField, :timestamp_field => 'date') 24 | end 25 | -------------------------------------------------------------------------------- /test/activerecord_provider/helpers/transactional_test_case.rb: -------------------------------------------------------------------------------- 1 | class TransactionalTestCase < Test::Unit::TestCase 2 | 3 | def run(result, &block) 4 | # Handle the default "you have no tests" test if it turns up 5 | return if @method_name.to_s == "default_test" 6 | ActiveRecord::Base.transaction do 7 | load_fixtures 8 | result = super(result, &block) 9 | raise ActiveRecord::Rollback 10 | end 11 | result 12 | end 13 | 14 | protected 15 | 16 | def load_fixtures 17 | fixtures = YAML.load_file( 18 | File.join(File.dirname(__FILE__), '..', 'fixtures', 'dc.yml') 19 | ) 20 | disable_logging do 21 | fixtures.keys.sort.each do |key| 22 | lang = DCLang.create(name: fixtures[key].delete('language')) 23 | DCField.create(fixtures[key].merge(dc_lang: lang)) 24 | end 25 | end 26 | end 27 | 28 | def disable_logging 29 | logger = ActiveRecord::Base.logger 30 | ActiveRecord::Base.logger = nil 31 | yield 32 | ActiveRecord::Base.logger = logger 33 | end 34 | 35 | end -------------------------------------------------------------------------------- /test/activerecord_provider/models/dc_field.rb: -------------------------------------------------------------------------------- 1 | class DCField < ActiveRecord::Base 2 | self.inheritance_column = 'DONOTINHERIT' 3 | has_and_belongs_to_many :sets, 4 | :join_table => "dc_fields_dc_sets", 5 | :foreign_key => "dc_field_id", 6 | :class_name => "DCSet" 7 | 8 | belongs_to :dc_lang, class_name: "DCLang", optional: true 9 | 10 | default_scope -> { left_outer_joins(:dc_lang) } 11 | 12 | def language 13 | dc_lang&.name 14 | end 15 | end 16 | -------------------------------------------------------------------------------- /test/activerecord_provider/models/dc_lang.rb: -------------------------------------------------------------------------------- 1 | class DCLang < ActiveRecord::Base 2 | has_many :dc_fields 3 | end 4 | -------------------------------------------------------------------------------- /test/activerecord_provider/models/dc_set.rb: -------------------------------------------------------------------------------- 1 | class DCSet < ActiveRecord::Base 2 | has_and_belongs_to_many :dc_fields, 3 | :join_table => "dc_fields_dc_sets", 4 | :foreign_key => "dc_set_id", 5 | :class_name => "DCField" 6 | 7 | end -------------------------------------------------------------------------------- /test/activerecord_provider/models/exclusive_set_dc_field.rb: -------------------------------------------------------------------------------- 1 | class ExclusiveSetDCField < ActiveRecord::Base 2 | self.inheritance_column = 'DONOTINHERIT' 3 | 4 | def self.sets 5 | klass = Struct.new(:name, :spec) 6 | self.distinct.pluck(:set).compact.map do |spec| 7 | klass.new("Set #{spec}", spec) 8 | end 9 | end 10 | 11 | belongs_to :dc_lang, class_name: "DCLang", optional: true 12 | 13 | def language 14 | dc_lang&.name 15 | end 16 | 17 | end 18 | -------------------------------------------------------------------------------- /test/activerecord_provider/models/oai_token.rb: -------------------------------------------------------------------------------- 1 | class OaiToken < ActiveRecord::Base 2 | serialize :params 3 | end -------------------------------------------------------------------------------- /test/activerecord_provider/tc_activerecord_wrapper.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper_ar_provider' 2 | 3 | class ActiveRecordWrapperTest < TransactionalTestCase 4 | def test_sql_conditions_from_date 5 | input = "2005-12-25" 6 | expected = input.dup 7 | sql_template, sql_opts = sql_conditions(from: input) 8 | assert_equal "dc_fields.updated_at >= :from", sql_template 9 | assert_equal expected, sql_opts[:from] 10 | sql_template, sql_opts = sql_conditions(from: Date.strptime(input, "%Y-%m-%d")) 11 | assert_equal "dc_fields.updated_at >= :from", sql_template 12 | assert_equal expected, sql_opts[:from] 13 | end 14 | 15 | def test_sql_conditions_from_time 16 | input = "2005-12-25T00:00:00Z" 17 | expected = "2005-12-25 00:00:00" 18 | sql_template, sql_opts = sql_conditions(from: input) 19 | assert_equal "dc_fields.updated_at >= :from", sql_template 20 | assert_equal expected, sql_opts[:from] 21 | sql_template, sql_opts = sql_conditions(from: Time.strptime(input, "%Y-%m-%dT%H:%M:%S%Z")) 22 | assert_equal "dc_fields.updated_at >= :from", sql_template 23 | assert_equal expected, sql_opts[:from] 24 | end 25 | 26 | def test_sql_conditions_until_date 27 | input = "2005-12-25" 28 | expected = "2005-12-26" 29 | sql_template, sql_opts = sql_conditions(until: input) 30 | assert_equal "dc_fields.updated_at < :until", sql_template 31 | assert_equal expected, sql_opts[:until] 32 | sql_template, sql_opts = sql_conditions(until: Date.strptime(input, "%Y-%m-%d")) 33 | assert_equal "dc_fields.updated_at < :until", sql_template 34 | assert_equal expected, sql_opts[:until] 35 | end 36 | 37 | def test_sql_conditions_until_time 38 | input = "2005-12-25T00:00:00Z" 39 | expected = "2005-12-25 00:00:01" 40 | sql_template, sql_opts = sql_conditions(until: input) 41 | assert_equal "dc_fields.updated_at < :until", sql_template 42 | assert_equal expected, sql_opts[:until] 43 | sql_template, sql_opts = sql_conditions(until: Time.strptime(input, "%Y-%m-%dT%H:%M:%S%Z")) 44 | assert_equal "dc_fields.updated_at < :until", sql_template 45 | assert_equal expected, sql_opts[:until] 46 | end 47 | 48 | def test_sql_conditions_both 49 | input = "2005-12-25" 50 | sql_template, sql_opts = sql_conditions(from: input, until: input) 51 | assert_equal "dc_fields.updated_at >= :from AND dc_fields.updated_at < :until", sql_template 52 | end 53 | 54 | def setup 55 | @wrapper = OAI::Provider::ActiveRecordWrapper.new(DCField) 56 | end 57 | 58 | def sql_conditions(opts) 59 | @wrapper.send :sql_conditions, opts 60 | end 61 | end 62 | 63 | 64 | -------------------------------------------------------------------------------- /test/activerecord_provider/tc_ar_provider.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper_ar_provider' 2 | 3 | class ActiveRecordProviderTest < TransactionalTestCase 4 | 5 | def test_identify 6 | assert @provider.identify =~ /ActiveRecord Based Provider/ 7 | end 8 | 9 | def test_metadata_formats 10 | assert_nothing_raised { REXML::Document.new(@provider.list_metadata_formats) } 11 | doc = REXML::Document.new(@provider.list_metadata_formats) 12 | assert doc.elements['/OAI-PMH/ListMetadataFormats/metadataFormat/metadataPrefix'].text == 'oai_dc' 13 | end 14 | 15 | def test_metadata_formats_for_record 16 | record_id = DCField.first.id 17 | assert_nothing_raised { REXML::Document.new(@provider.list_metadata_formats(:identifier => "oai:test:#{record_id}")) } 18 | doc = REXML::Document.new(@provider.list_metadata_formats) 19 | assert doc.elements['/OAI-PMH/ListMetadataFormats/metadataFormat/metadataPrefix'].text == 'oai_dc' 20 | end 21 | 22 | def test_list_records 23 | assert_nothing_raised do 24 | REXML::Document.new(@provider.list_records(:metadata_prefix => 'oai_dc')) 25 | end 26 | doc = REXML::Document.new(@provider.list_records( 27 | :metadata_prefix => 'oai_dc')) 28 | assert_equal 100, doc.elements['OAI-PMH/ListRecords'].to_a.size 29 | end 30 | 31 | def test_list_records_scope 32 | @provider = ARProviderWithScope.new 33 | 34 | doc = nil 35 | assert_nothing_raised do 36 | doc = REXML::Document.new(@provider.list_records(:metadata_prefix => 'oai_dc')) 37 | end 38 | 39 | expected_count = DCField.where("date < ?", ARProviderWithScope::DATE_LESS_THAN_RESTRICTION).count 40 | assert_equal expected_count, doc.elements['OAI-PMH/ListRecords'].to_a.size 41 | end 42 | 43 | def test_invalid_set_raises_no_match 44 | assert_raises(OAI::NoMatchException) do 45 | @provider.list_records(:metadata_prefix => 'oai_dc', :set => "invalid_does_not_exist") 46 | end 47 | end 48 | 49 | def test_get_record_alternate_identifier_column 50 | @provider = ARProviderCustomIdentifierField.new 51 | 52 | record_id = DCField.first.send(@provider.class.model.identifier_field) 53 | 54 | doc = REXML::Document.new(@provider.get_record( 55 | :identifier => "oai:test:#{record_id}", :metadata_prefix => 'oai_dc')) 56 | 57 | assert_equal "oai:test:#{record_id}", doc.elements['OAI-PMH/GetRecord/record/header/identifier'].text 58 | end 59 | 60 | def test_list_identifiers 61 | assert_nothing_raised { REXML::Document.new(@provider.list_identifiers) } 62 | doc = REXML::Document.new(@provider.list_identifiers) 63 | assert_equal 100, doc.elements['OAI-PMH/ListIdentifiers'].to_a.size 64 | end 65 | 66 | def test_get_record 67 | record_id = DCField.first.id 68 | assert_nothing_raised do 69 | REXML::Document.new(@provider.get_record( 70 | :identifier => "oai:test:#{record_id}", :metadata_prefix => 'oai_dc')) 71 | end 72 | doc = REXML::Document.new(@provider.get_record( 73 | :identifier => "#{record_id}", :metadata_prefix => 'oai_dc')) 74 | assert_equal "oai:test:#{record_id}", doc.elements['OAI-PMH/GetRecord/record/header/identifier'].text 75 | end 76 | 77 | def test_deleted 78 | record = DCField.first 79 | record.deleted = true; 80 | record.save 81 | doc = REXML::Document.new(@provider.get_record( 82 | :identifier => "oai:test:#{record.id}", :metadata_prefix => 'oai_dc')) 83 | assert_equal "oai:test:#{record.id}", doc.elements['OAI-PMH/GetRecord/record/header/identifier'].text 84 | assert_equal 'deleted', doc.elements['OAI-PMH/GetRecord/record/header'].attributes["status"] 85 | end 86 | 87 | def test_from 88 | first_id = DCField.order("id asc").first.id 89 | DCField.where("dc_fields.id < #{first_id + 90}").update_all(updated_at: Time.parse("January 1 2005")) 90 | 91 | DCField.where("dc_fields.id < #{first_id + 10}").update_all(updated_at: Time.parse("June 1 2005")) 92 | 93 | 94 | from_param = Time.parse("January 1 2006").getutc.iso8601 95 | 96 | doc = REXML::Document.new( 97 | @provider.list_records( 98 | :metadata_prefix => 'oai_dc', :from => from_param) 99 | ) 100 | assert_equal DCField.where(["dc_fields.updated_at >= ?", from_param]).size, 101 | doc.elements['OAI-PMH/ListRecords'].size 102 | 103 | doc = REXML::Document.new( 104 | @provider.list_records( 105 | :metadata_prefix => 'oai_dc', :from => Time.parse("May 30 2005").getutc.iso8601) 106 | ) 107 | assert_equal 20, doc.elements['OAI-PMH/ListRecords'].to_a.size 108 | end 109 | 110 | def test_until 111 | first_id = DCField.order(id: :asc).first.id 112 | DCField.where("dc_fields.id < ?", first_id + 10).update_all(updated_at: Time.parse("June 1 2005")) 113 | 114 | doc = REXML::Document.new( 115 | @provider.list_records( 116 | :metadata_prefix => 'oai_dc', :until => Time.parse("June 1 2005")) 117 | ) 118 | assert_equal 10, doc.elements['OAI-PMH/ListRecords'].to_a.size 119 | end 120 | 121 | def test_from_and_until 122 | first_id = DCField.order(id: :asc).first.id 123 | DCField.update_all(updated_at: Time.parse("June 1 2005")) 124 | DCField.where("dc_fields.id < ?", first_id + 50).update_all(updated_at: Time.parse("June 15 2005")) 125 | DCField.where("dc_fields.id < ?", first_id + 10).update_all(updated_at: Time.parse("June 30 2005")) 126 | 127 | doc = REXML::Document.new( 128 | @provider.list_records( 129 | :metadata_prefix => 'oai_dc', 130 | :from => Time.parse("June 3 2005").getutc.iso8601, 131 | :until => Time.parse("June 16 2005").getutc.iso8601) 132 | ) 133 | assert_equal 40, doc.elements['OAI-PMH/ListRecords'].to_a.size 134 | end 135 | 136 | def test_bad_until_raises_exception 137 | DCField.order(id: :asc).limit(10).update_all(updated_at: 1.year.ago) 138 | DCField.order(id: :desc).limit(10).update_all(updated_at: 1.year.from_now) 139 | badTimes = [ 140 | 'junk', 141 | 'February 92nd, 2015'] 142 | badTimes.each do |time| 143 | assert_raise(OAI::ArgumentException) do 144 | @provider.list_records(:metadata_prefix => 'oai_dc', :until => time) 145 | end 146 | end 147 | end 148 | 149 | def test_bad_from_raises_exception 150 | DCField.order(id: :asc).limit(10).update_all(updated_at: 1.year.ago) 151 | DCField.order(id: :desc).limit(10).update_all(updated_at: 1.year.from_now) 152 | 153 | badTimes = [ 154 | 'junk', 155 | 'February 92nd, 2015'] 156 | badTimes.each do |time| 157 | assert_raise(OAI::ArgumentException) do 158 | @provider.list_records(:metadata_prefix => 'oai_dc', :from => time) 159 | end 160 | end 161 | end 162 | 163 | def test_handles_empty_collections 164 | DCField.delete_all 165 | assert DCField.count == 0 166 | # Identify and ListMetadataFormats should return normally 167 | test_identify 168 | test_metadata_formats 169 | # ListIdentifiers and ListRecords should return "noRecordsMatch" error code 170 | assert_raises(OAI::NoMatchException) do 171 | REXML::Document.new(@provider.list_identifiers) 172 | end 173 | assert_raises(OAI::NoMatchException) do 174 | REXML::Document.new(@provider.list_records(:metadata_prefix => 'oai_dc')) 175 | end 176 | end 177 | 178 | def test_bad_id_raises_exception 179 | badIdentifiers = [ 180 | 'invalid"id', 181 | 'oai:test/5000', 182 | 'oai:test/-1', 183 | 'oai:test/one', 184 | 'oai:test/\\$1\1!'] 185 | badIdentifiers.each do |id| 186 | assert_raise(OAI::IdException) do 187 | @provider.get_record(:identifier => id, :metadata_prefix => 'oai_dc') 188 | end 189 | end 190 | end 191 | 192 | 193 | def setup 194 | @provider = ARProvider.new 195 | end 196 | 197 | end 198 | 199 | 200 | -------------------------------------------------------------------------------- /test/activerecord_provider/tc_ar_sets_provider.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper_ar_provider' 2 | 3 | class ActiveRecordSetProviderTest < TransactionalTestCase 4 | 5 | def test_list_sets 6 | doc = REXML::Document.new(@provider.list_sets) 7 | sets = doc.elements["/OAI-PMH/ListSets"] 8 | assert sets.size == 4 9 | assert sets[0].elements["//setName"].text == "Set A" 10 | end 11 | 12 | def test_set_a 13 | doc = REXML::Document.new(@provider.list_records( 14 | :metadata_prefix => 'oai_dc', :set => "A")) 15 | assert_equal 20, doc.elements['OAI-PMH/ListRecords'].to_a.size 16 | end 17 | 18 | def test_set_b 19 | doc = REXML::Document.new(@provider.list_records( 20 | :metadata_prefix => 'oai_dc', :set => "B")) 21 | assert_equal 10, doc.elements['OAI-PMH/ListRecords'].to_a.size 22 | end 23 | 24 | def test_set_ab 25 | doc = REXML::Document.new(@provider.list_records( 26 | :metadata_prefix => 'oai_dc', :set => "A:B")) 27 | assert_equal 10, doc.elements['OAI-PMH/ListRecords'].to_a.size 28 | end 29 | 30 | def test_record_with_multiple_sets 31 | record = DCSet.where("spec = 'C'").first.dc_fields.first 32 | assert_equal 2, record.sets.size 33 | end 34 | 35 | def test_missing_set 36 | assert_raise(OAI::NoMatchException) do 37 | doc = REXML::Document.new(@provider.list_records( 38 | :metadata_prefix => 'oai_dc', :set => "D")) 39 | end 40 | end 41 | 42 | def setup 43 | @provider = ARSetProvider.new 44 | define_sets 45 | end 46 | 47 | def define_sets 48 | set_a = DCSet.create(:name => "Set A", :spec => "A") 49 | set_b = DCSet.create(:name => "Set B", :spec => "B") 50 | set_c = DCSet.create(:name => "Set C", :spec => "C") 51 | set_ab = DCSet.create(:name => "Set A:B", :spec => "A:B") 52 | 53 | next_id = 0 54 | DCField.limit(10).order(id: :asc).each do |record| 55 | set_a.dc_fields << record 56 | next_id = record.id 57 | end 58 | 59 | DCField.where("dc_fields.id > ?", next_id).limit(10).order(id: :asc).each do |record| 60 | set_b.dc_fields << record 61 | next_id = record.id 62 | end 63 | 64 | DCField.where("dc_fields.id > ?", next_id).limit(10).order(id: :asc).each do |record| 65 | set_ab.dc_fields << record 66 | next_id = record.id 67 | end 68 | 69 | DCField.where("dc_fields.id > ?", next_id).limit(10).order(id: :asc).each do |record| 70 | set_a.dc_fields << record 71 | set_c.dc_fields << record 72 | next_id = record.id 73 | end 74 | end 75 | end 76 | 77 | 78 | class ActiveRecordExclusiveSetsProviderTest < TransactionalTestCase 79 | 80 | def test_list_sets 81 | doc = REXML::Document.new(@provider.list_sets) 82 | sets = doc.elements["/OAI-PMH/ListSets"] 83 | assert_equal 3, sets.size 84 | assert_equal "Set A", sets[0].elements["//setName"].text 85 | end 86 | 87 | def test_set_a 88 | doc = REXML::Document.new(@provider.list_records( 89 | :metadata_prefix => 'oai_dc', :set => "A")) 90 | assert_equal 20, doc.elements['OAI-PMH/ListRecords'].to_a.size 91 | end 92 | 93 | def test_set_b 94 | doc = REXML::Document.new(@provider.list_records( 95 | :metadata_prefix => 'oai_dc', :set => "B")) 96 | assert_equal 10, doc.elements['OAI-PMH/ListRecords'].to_a.size 97 | end 98 | 99 | def test_set_ab 100 | doc = REXML::Document.new(@provider.list_records( 101 | :metadata_prefix => 'oai_dc', :set => "A:B")) 102 | assert_equal 10, doc.elements['OAI-PMH/ListRecords'].to_a.size 103 | end 104 | 105 | def test_missing_set 106 | assert_raise(OAI::NoMatchException) do 107 | doc = REXML::Document.new(@provider.list_records( 108 | :metadata_prefix => 'oai_dc', :set => "D")) 109 | end 110 | end 111 | 112 | def setup 113 | @provider = ARExclusiveSetProvider.new 114 | define_sets 115 | end 116 | 117 | def define_sets 118 | next_id = 0 119 | 120 | ExclusiveSetDCField.limit(10).order(id: :asc).each do |record| 121 | record.set = "A" 122 | record.save! 123 | next_id = record.id 124 | end 125 | 126 | ExclusiveSetDCField.where("id > ?", next_id).limit(10).order(id: :asc).each do |record| 127 | record.set = "B" 128 | record.save! 129 | next_id = record.id 130 | end 131 | 132 | ExclusiveSetDCField.where("id > ?", next_id).limit(10).order(id: :asc).each do |record| 133 | record.set = "A:B" 134 | record.save! 135 | next_id = record.id 136 | end 137 | 138 | ExclusiveSetDCField.where("id > ?", next_id).limit(10).order(id: :asc).each do |record| 139 | record.set = "A" 140 | record.save! 141 | next_id = record.id 142 | end 143 | end 144 | 145 | protected 146 | 147 | def load_fixtures 148 | fixtures = YAML.load_file( 149 | File.join(File.dirname(__FILE__), 'fixtures', 'dc.yml') 150 | ) 151 | disable_logging do 152 | fixtures.keys.sort.each do |key| 153 | lang = DCLang.create(name: fixtures[key].delete('language')) 154 | ExclusiveSetDCField.create(fixtures[key].merge(dc_lang: lang)) 155 | end 156 | end 157 | end 158 | 159 | end 160 | -------------------------------------------------------------------------------- /test/activerecord_provider/tc_caching_paging_provider.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper_ar_provider' 2 | 3 | class CachingPagingProviderTest < TransactionalTestCase 4 | include REXML 5 | 6 | def test_full_harvest 7 | doc = Document.new(@provider.list_records(:metadata_prefix => 'oai_dc')) 8 | assert_not_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"] 9 | assert_equal 26, doc.elements["/OAI-PMH/ListRecords"].size 10 | token = doc.elements["/OAI-PMH/ListRecords/resumptionToken"].text 11 | doc = Document.new(@provider.list_records(:resumption_token => token)) 12 | assert_not_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"] 13 | token = doc.elements["/OAI-PMH/ListRecords/resumptionToken"].text 14 | assert_equal 26, doc.elements["/OAI-PMH/ListRecords"].size 15 | doc = Document.new(@provider.list_records(:resumption_token => token)) 16 | assert_not_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"] 17 | token = doc.elements["/OAI-PMH/ListRecords/resumptionToken"].text 18 | assert_equal 26, doc.elements["/OAI-PMH/ListRecords"].size 19 | doc = Document.new(@provider.list_records(:resumption_token => token)) 20 | assert_not_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"] 21 | assert_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"].text 22 | assert_equal 26, doc.elements["/OAI-PMH/ListRecords"].to_a.size 23 | end 24 | 25 | def test_from_and_until 26 | first_id = DCField.order(id: :asc).first.id 27 | DCField.where("dc_fields.id <= ?", first_id + 25).update_all(updated_at: Time.parse("September 15 2005")) 28 | DCField.where(["dc_fields.id < ? and dc_fields.id > ?", first_id + 50, first_id + 25]).update_all(updated_at: Time.parse("November 1 2005")) 29 | 30 | # Should return 50 records broken into 2 groups of 25. 31 | doc = Document.new( 32 | @provider.list_records( 33 | :metadata_prefix => 'oai_dc', 34 | :from => Time.parse("September 1 2005"), 35 | :until => Time.parse("November 30 2005")) 36 | ) 37 | assert_equal 26, doc.elements["/OAI-PMH/ListRecords"].size 38 | token = doc.elements["/OAI-PMH/ListRecords/resumptionToken"].text 39 | assert_not_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"] 40 | doc = Document.new(@provider.list_records(:resumption_token => token)) 41 | assert_not_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"] 42 | assert_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"].text 43 | assert_equal 26, doc.elements["/OAI-PMH/ListRecords"].to_a.size 44 | end 45 | 46 | def setup 47 | @provider = CachingResumptionProvider.new 48 | end 49 | 50 | end 51 | -------------------------------------------------------------------------------- /test/activerecord_provider/tc_simple_paging_provider.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper_ar_provider' 2 | 3 | class SimpleResumptionProviderTest < TransactionalTestCase 4 | include REXML 5 | 6 | def test_full_harvest 7 | doc = Document.new(@provider.list_records(:metadata_prefix => 'oai_dc')) 8 | assert_not_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"] 9 | assert_equal 26, doc.elements["/OAI-PMH/ListRecords"].to_a.size 10 | token = doc.elements["/OAI-PMH/ListRecords/resumptionToken"].text 11 | 12 | doc = Document.new(@provider.list_records(:resumption_token => token)) 13 | assert_not_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"] 14 | token = doc.elements["/OAI-PMH/ListRecords/resumptionToken"].text 15 | assert_equal 26, doc.elements["/OAI-PMH/ListRecords"].to_a.size 16 | 17 | doc = Document.new(@provider.list_records(:resumption_token => token)) 18 | assert_not_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"] 19 | token = doc.elements["/OAI-PMH/ListRecords/resumptionToken"].text 20 | assert_equal 26, doc.elements["/OAI-PMH/ListRecords"].to_a.size 21 | 22 | doc = Document.new(@provider.list_records(:resumption_token => token)) 23 | assert_not_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"] 24 | assert_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"].text 25 | assert_equal 26, doc.elements["/OAI-PMH/ListRecords"].to_a.size 26 | end 27 | 28 | def test_non_integer_identifiers_resumption 29 | @provider = SimpleResumptionProviderWithNonIntegerID.new 30 | 31 | doc = Document.new(@provider.list_records(:metadata_prefix => 'oai_dc')) 32 | assert_not_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"] 33 | assert_equal 26, doc.elements["/OAI-PMH/ListRecords"].to_a.size 34 | token = doc.elements["/OAI-PMH/ListRecords/resumptionToken"].text 35 | 36 | next_doc = Document.new(@provider.list_records(:resumption_token => token)) 37 | assert_not_nil next_doc.elements["/OAI-PMH/ListRecords/resumptionToken"] 38 | next_token = next_doc.elements["/OAI-PMH/ListRecords/resumptionToken"].text 39 | assert_equal 26, next_doc.elements["/OAI-PMH/ListRecords"].to_a.size 40 | 41 | assert_not_equal token, next_token 42 | end 43 | 44 | def test_from_and_until 45 | first_id = DCField.order("id asc").first.id 46 | DCField.where("dc_fields.id < ?", first_id + 25).update_all(updated_at: Time.parse("September 15 2005")) 47 | DCField.where(["dc_fields.id <= ? and dc_fields.id > ?", first_id + 50, first_id + 25]).update_all(updated_at: Time.parse("November 1 2005")) 48 | 49 | total = DCField.where(["dc_fields.updated_at >= ? AND dc_fields.updated_at <= ?", Time.parse("September 1 2005"), Time.parse("November 30 2005")]).count 50 | 51 | # Should return 50 records broken into 2 groups of 25. 52 | doc = Document.new( 53 | @provider.list_records( 54 | :metadata_prefix => 'oai_dc', 55 | :from => Time.parse("September 1 2005"), 56 | :until => Time.parse("November 30 2005")) 57 | ) 58 | assert_equal 26, doc.elements["/OAI-PMH/ListRecords"].to_a.size 59 | assert_not_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"] 60 | token = doc.elements["/OAI-PMH/ListRecords/resumptionToken"].text 61 | doc = Document.new(@provider.list_records(:resumption_token => token)) 62 | assert_not_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"] 63 | assert_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"].text 64 | assert_equal 26, doc.elements["/OAI-PMH/ListRecords"].to_a.size 65 | end 66 | 67 | def setup 68 | @provider = SimpleResumptionProvider.new 69 | end 70 | 71 | end 72 | -------------------------------------------------------------------------------- /test/activerecord_provider/test_helper_ar_provider.rb: -------------------------------------------------------------------------------- 1 | require 'rubygems' 2 | 3 | require 'test/unit' 4 | require File.dirname(__FILE__) + '/config/connection' 5 | require File.dirname(__FILE__) + '/helpers/providers' 6 | require File.dirname(__FILE__) + '/helpers/set_provider' 7 | require File.dirname(__FILE__) + '/helpers/transactional_test_case' 8 | -------------------------------------------------------------------------------- /test/client/helpers/provider.rb: -------------------------------------------------------------------------------- 1 | require 'webrick' 2 | require File.dirname(__FILE__) + '/../../provider/models' 3 | 4 | 5 | class ProviderServer 6 | class ComplexClientProvider < OAI::Provider::Base 7 | repository_name 'Complex Provider' 8 | repository_url 'http://localhost' 9 | record_prefix 'oai:test' 10 | source_model ComplexModel.new(100) 11 | end 12 | 13 | attr_reader :consumed, :server 14 | 15 | def initialize(port, mount_point) 16 | @consumed = [] 17 | @provider = ComplexClientProvider.new 18 | @server = WEBrick::HTTPServer.new( 19 | :BindAddress => '127.0.0.1', 20 | :Logger => WEBrick::Log.new('/dev/null'), 21 | :AccessLog => [], 22 | :Port => port) 23 | @server.mount_proc(mount_point, server_proc) 24 | end 25 | 26 | def port 27 | @server.config[:Port] 28 | end 29 | 30 | def start 31 | @thread = Thread.new { @server.start } 32 | end 33 | 34 | def stop 35 | @thread.exit if @thread 36 | end 37 | 38 | def self.wrap(port = 3333, mount_point='/oai') 39 | server = self.new(port, mount_point) 40 | begin 41 | server.start 42 | yield(server) 43 | ensure 44 | server.stop 45 | end 46 | end 47 | 48 | protected 49 | 50 | def server_proc 51 | Proc.new do |req, res| 52 | begin 53 | res.body = @provider.process_request(req.query) 54 | res.status = 200 55 | res['Content-Type'] = 'text/xml' 56 | rescue => err 57 | puts err 58 | puts err.backtrace.join("\n") 59 | res.body = err.backtrace.join("\n") 60 | res.status = 500 61 | end 62 | end 63 | end 64 | 65 | end 66 | -------------------------------------------------------------------------------- /test/client/helpers/test_wrapper.rb: -------------------------------------------------------------------------------- 1 | unless $provider_server 2 | $provider_server = ProviderServer.new(3333, '/oai') 3 | $provider_server.start 4 | sleep 0.2 5 | end 6 | 7 | -------------------------------------------------------------------------------- /test/client/tc_exception.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper_client' 2 | 3 | class ExceptionTest < Test::Unit::TestCase 4 | 5 | def test_not_found 6 | client = OAI::Client.new 'http://www.google.com' 7 | assert_raises(OAI::Exception) { client.identify } 8 | end 9 | 10 | def test_xml_error 11 | client = OAI::Client.new 'http://www.yahoo.com' 12 | begin 13 | client.identify 14 | rescue OAI::Exception => e 15 | assert_match /response not well formed XML/, e.to_s, 'xml error' 16 | end 17 | end 18 | 19 | def test_oai_error 20 | client = OAI::Client.new 'http://localhost:3333/oai' 21 | assert_raises(OAI::ResumptionTokenException) do 22 | client.list_identifiers :resumption_token => 'bogus' 23 | end 24 | end 25 | 26 | # must pass in options as a hash 27 | def test_parameter_error 28 | client = OAI::Client.new 'http://localhost:3333/oai' 29 | assert_raises(OAI::ArgumentException) {client.get_record('foo')} 30 | assert_raises(OAI::ArgumentException) {client.list_identifiers('foo')} 31 | assert_raises(OAI::ArgumentException) {client.list_records('foo')} 32 | assert_raises(OAI::ArgumentException) {client.list_metadata_formats('foo')} 33 | assert_raises(OAI::ArgumentException) {client.list_sets('foo')} 34 | end 35 | 36 | end 37 | -------------------------------------------------------------------------------- /test/client/tc_get_record.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper_client' 2 | 3 | class GetRecordTest < Test::Unit::TestCase 4 | 5 | def test_get_one 6 | client = OAI::Client.new 'http://localhost:3333/oai' 7 | response = client.get_record :identifier => 'oai:test:3' 8 | assert_kind_of OAI::GetRecordResponse, response 9 | assert_kind_of OAI::Record, response.record 10 | assert_kind_of REXML::Element, response.record._source 11 | assert_kind_of REXML::Element, response.record.metadata 12 | assert_kind_of OAI::Header, response.record.header 13 | assert_kind_of REXML::Element, response.record.about 14 | 15 | # minimal check that the header is working 16 | assert_equal 'oai:test:3', response.record.header.identifier 17 | 18 | # minimal check that the metadata is working 19 | #assert 'en', response.record.metadata.elements['.//dc:language'].text 20 | assert_equal 'Ruby OAI test data', response.record.about.elements['.//dc:publisher'].text 21 | end 22 | 23 | def test_missing_identifier 24 | client = OAI::Client.new 'http://localhost:3333/oai' 25 | begin 26 | client.get_record :metadata_prefix => 'oai_dc' 27 | flunk 'invalid get_record did not throw OAI::Exception' 28 | rescue OAI::ArgumentException => e 29 | assert_match /The request includes illegal arguments/, e.to_s 30 | end 31 | end 32 | 33 | def test_deleted_record 34 | client = OAI::Client.new 'http://localhost:3333/oai' 35 | record = client.get_record :identifier => 'oai:test:275' 36 | assert record.deleted? 37 | end 38 | 39 | end 40 | -------------------------------------------------------------------------------- /test/client/tc_http_client.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper_client' 2 | require 'webrick' 3 | 4 | class HttpClientTest < Test::Unit::TestCase 5 | def test_user_agent_and_from_headers 6 | faraday_stub = Faraday.new do |builder| 7 | builder.adapter :test do |stub| 8 | stub.get('/echo') { |env| [200, {}, Marshal.dump(env)] } 9 | end 10 | end 11 | 12 | client = OAI::Client.new 'http://localhost:3333/oai', :headers => { 'From' => 'oai@example.com', 'User-Agent' => 'ruby-oai' }, :http => faraday_stub 13 | 14 | response = client.send(:get, '/echo') 15 | env = Marshal.load(response) 16 | 17 | assert_equal('oai@example.com', env.request_headers['From']) 18 | assert_equal('ruby-oai', env.request_headers['User-Agent']) 19 | end 20 | 21 | def test_pluggable_http_client 22 | oai_response = <<-eos 23 | 24 | Mock OAI Provider 25 | http://nowhere.example.com 26 | 27 | eos 28 | 29 | faraday_stub = Faraday.new do |builder| 30 | builder.adapter :test do |stub| 31 | stub.get('/oai?verb=Identify') { [200, {}, oai_response] } 32 | end 33 | end 34 | client = OAI::Client.new 'http://localhost:3333/oai', :http => faraday_stub 35 | response = client.identify 36 | 37 | assert_kind_of OAI::IdentifyResponse, response 38 | assert_equal 'Mock OAI Provider [http://nowhere.example.com]', response.to_s 39 | 40 | end 41 | 42 | def test_http_client_handles_trailing_slash_redirects 43 | # First, test that this works when mocking out Faraday client 44 | oai_response = <<-eos 45 | 46 | Mock OAI Provider 47 | http://nowhere.example.com 48 | 49 | eos 50 | 51 | stubs = TrailingSlashAwareStubs.new do |stub| 52 | stub.get('/oai/?verb=Identify') { [200, {}, oai_response] } 53 | stub.get('/oai?verb=Identify') { 54 | [301, { 55 | 'Location' => 'http://localhost:3334/oai/?verb=Identify' 56 | }, ''] 57 | } 58 | end 59 | 60 | faraday_stub = Faraday.new do |builder| 61 | require 'faraday/follow_redirects' 62 | builder.use Faraday::FollowRedirects::Middleware 63 | builder.adapter :test, stubs 64 | end 65 | 66 | client = OAI::Client.new 'http://localhost:3334/oai', :http => faraday_stub 67 | response = client.identify 68 | 69 | assert_kind_of OAI::IdentifyResponse, response 70 | assert_equal 'Mock OAI Provider [http://nowhere.example.com]', response.to_s 71 | assert_equal 2, stubs.consumed[:get].length 72 | assert_equal stubs.consumed[:get].first.path, '/oai' 73 | assert_equal stubs.consumed[:get].last.path, '/oai/' 74 | 75 | # Now try it with a real server and default Faraday client 76 | TrailingSlashProviderServer.wrap(3334) do |server| 77 | client = OAI::Client.new "http://localhost:#{server.port}/oai" 78 | response = client.identify 79 | 80 | assert_kind_of OAI::IdentifyResponse, response 81 | assert_equal 'Complex Provider [http://localhost]', response.to_s 82 | assert_equal 2, server.consumed.length 83 | assert_equal server.consumed.first.path, '/oai' 84 | assert_equal server.consumed.last.path, '/oai/' 85 | end 86 | end 87 | 88 | private 89 | 90 | class TrailingSlashProviderServer < ProviderServer 91 | def server_proc 92 | Proc.new do |req, res| 93 | @consumed << req 94 | case req.path 95 | when "/oai/" 96 | begin 97 | res.body = @provider.process_request(req.query) 98 | res.status = 200 99 | res['Content-Type'] = 'text/xml' 100 | rescue => err 101 | puts err 102 | puts err.backtrace.join("\n") 103 | res.body = err.backtrace.join("\n") 104 | res.status = 500 105 | end 106 | else 107 | res.body = '' 108 | res.status = 301 109 | res['Location'] = "http://localhost:#{port}/oai/?#{req.query_string}" 110 | end 111 | res 112 | end 113 | end 114 | end 115 | 116 | class TrailingSlashAwareStubs < Faraday::Adapter::Test::Stubs 117 | attr_reader :consumed 118 | 119 | # ensure leading, but not trailing slash 120 | def normalize_path(path) 121 | path = '/' + path if path.index('/') != 0 122 | #path = path.sub('?', '/?') 123 | #path = path + '/' unless $& 124 | path.gsub('//', '/') 125 | end 126 | 127 | end 128 | end 129 | 130 | -------------------------------------------------------------------------------- /test/client/tc_identify.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper_client' 2 | 3 | class IdentifyTest < Test::Unit::TestCase 4 | 5 | def test_ok 6 | client = OAI::Client.new 'http://localhost:3333/oai' 7 | response = client.identify 8 | assert_kind_of OAI::IdentifyResponse, response 9 | assert_equal 'Complex Provider [http://localhost]', response.to_s 10 | #assert_equal 'PubMed Central (PMC3 - NLM DTD) [http://www.pubmedcentral.gov/oai/oai.cgi]', response.to_s 11 | end 12 | 13 | end 14 | -------------------------------------------------------------------------------- /test/client/tc_libxml.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper_client' 2 | 3 | class LibXMLTest < Test::Unit::TestCase 4 | 5 | def test_oai_exception 6 | return unless have_libxml 7 | 8 | uri = 'http://localhost:3333/oai' 9 | client = OAI::Client.new uri, :parser => 'libxml' 10 | assert_raises(OAI::IdException) {client.get_record(:identifier => 'nosuchid')} 11 | end 12 | 13 | def test_list_records 14 | return unless have_libxml 15 | 16 | # since there is regex magic going on to remove default oai namespaces 17 | # it's worth trying a few different oai targets 18 | oai_targets = %w{ 19 | http://localhost:3333/oai 20 | } 21 | 22 | #oai_targets = %w{ 23 | # http://etd.caltech.edu:80/ETD-db/OAI/oai 24 | # http://ir.library.oregonstate.edu/dspace-oai/request 25 | # http://memory.loc.gov/cgi-bin/oai2_0 26 | # http://libeprints.open.ac.uk/perl/oai2 27 | #} 28 | 29 | 30 | oai_targets.each do |uri| 31 | client = OAI::Client.new uri, :parser => 'libxml' 32 | records = client.list_records 33 | records.each do |record| 34 | assert record.header.identifier 35 | next if record.deleted? 36 | assert_kind_of LibXML::XML::Node, record.metadata 37 | end 38 | end 39 | end 40 | 41 | def test_deleted_record 42 | return unless have_libxml 43 | 44 | uri = 'http://localhost:3333/oai' 45 | client = OAI::Client.new(uri, :parser => 'libxml') 46 | response = client.get_record :identifier => 'oai:test:275' 47 | assert response.record.deleted? 48 | end 49 | 50 | private 51 | 52 | def have_libxml 53 | begin 54 | require 'xml/libxml' 55 | return true 56 | rescue LoadError 57 | return false 58 | end 59 | end 60 | 61 | end 62 | -------------------------------------------------------------------------------- /test/client/tc_list_identifiers.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper_client' 2 | 3 | class ListIdentifiersTest < Test::Unit::TestCase 4 | 5 | def test_list_with_resumption_token 6 | client = OAI::Client.new 'http://localhost:3333/oai' 7 | 8 | # get a list of identifier headers 9 | response = client.list_identifiers :metadata_prefix => 'oai_dc' 10 | assert_kind_of OAI::ListIdentifiersResponse, response 11 | assert_kind_of OAI::Response, response 12 | assert response.entries.size > 0 13 | 14 | # make sure header is put together reasonably 15 | header = response.entries[0] 16 | assert_kind_of OAI::Header, header 17 | assert header.identifier 18 | assert header.datestamp 19 | assert header.set_spec 20 | 21 | # exercise a resumption token and make sure first identifier is different 22 | first_identifier = response.entries[0].identifier 23 | token = response.resumption_token 24 | assert_not_nil token 25 | response = client.list_identifiers :resumption_token => token 26 | assert response.entries.size > 0 27 | assert_not_equal response.entries[0].identifier, first_identifier 28 | end 29 | 30 | def test_list_full 31 | client = OAI::Client.new 'http://localhost:3333/oai' 32 | 33 | # get a list of identifier headers 34 | response = client.list_identifiers :metadata_prefix => 'oai_dc' 35 | assert_kind_of OAI::ListIdentifiersResponse, response 36 | assert_kind_of OAI::Response, response 37 | assert response.respond_to?(:full), "Should expose :full" 38 | 39 | # Check that it runs through the pages 40 | assert_equal 1150, response.full.count 41 | response.full.each do |header| 42 | assert_kind_of OAI::Header, header 43 | assert header.identifier 44 | assert header.datestamp 45 | assert header.set_spec 46 | end 47 | end 48 | 49 | def test_list_with_date_range 50 | client = OAI::Client.new 'http://localhost:3333/oai' 51 | from_date = Date.new(1998,1,1) 52 | until_date = Date.new(2002,1,1) 53 | response = client.list_identifiers :from => from_date, :until => until_date 54 | assert response.entries.size > 0 55 | end 56 | 57 | def test_list_with_datetime_range 58 | # xtcat should support higher granularity 59 | client = OAI::Client.new 'http://localhost:3333/oai' 60 | from_date = DateTime.new(2001,1,1) 61 | until_date = DateTime.now 62 | response = client.list_identifiers :from => from_date, :until => until_date 63 | assert response.entries.size > 0 64 | end 65 | 66 | def test_invalid_argument 67 | client = OAI::Client.new 'http://localhost:3333/oai' 68 | assert_raise(OAI::ArgumentException) {client.list_identifiers :foo => 'bar'} 69 | end 70 | 71 | end 72 | -------------------------------------------------------------------------------- /test/client/tc_list_metadata_formats.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper_client' 2 | 3 | class ListMetadataFormatsTest < Test::Unit::TestCase 4 | def test_list 5 | client = OAI::Client.new 'http://localhost:3333/oai' 6 | response = client.list_metadata_formats 7 | assert_kind_of OAI::ListMetadataFormatsResponse, response 8 | assert response.entries.size > 0 9 | 10 | format = response.entries[0] 11 | assert_kind_of OAI::MetadataFormat, format 12 | assert_equal 'oai_dc', format.prefix 13 | assert_equal 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd', format.schema 14 | assert_equal 'http://www.openarchives.org/OAI/2.0/oai_dc/', format.namespace 15 | end 16 | 17 | end 18 | 19 | -------------------------------------------------------------------------------- /test/client/tc_list_records.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper_client' 2 | 3 | class ListRecordsTest < Test::Unit::TestCase 4 | 5 | def test_list 6 | client = OAI::Client.new 'http://localhost:3333/oai' 7 | response = client.list_records 8 | assert_kind_of OAI::ListRecordsResponse, response 9 | assert response.entries.size > 0 10 | assert_kind_of OAI::Record, response.entries[0] 11 | end 12 | 13 | def test_list_full 14 | client = OAI::Client.new 'http://localhost:3333/oai' 15 | 16 | response = client.list_records 17 | assert_kind_of OAI::ListRecordsResponse, response 18 | 19 | # Check that it runs through the pages 20 | assert_equal 1150, response.full.count 21 | response.full.each do |record| 22 | assert_kind_of OAI::Record, record 23 | end 24 | end 25 | 26 | end 27 | -------------------------------------------------------------------------------- /test/client/tc_list_sets.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper_client' 2 | 3 | class ListSetsTest < Test::Unit::TestCase 4 | 5 | def test_list 6 | client = OAI::Client.new 'http://localhost:3333/oai' 7 | response = client.list_sets 8 | assert_kind_of OAI::ListSetsResponse, response 9 | assert response.entries.size > 0 10 | assert_kind_of OAI::Set, response.entries[0] 11 | 12 | # test iterator 13 | for set in response 14 | assert_kind_of OAI::Set, set 15 | end 16 | end 17 | 18 | def test_list_full 19 | client = OAI::Client.new 'http://localhost:3333/oai' 20 | 21 | response = client.list_sets 22 | assert_kind_of OAI::ListSetsResponse, response 23 | assert_kind_of OAI::Response, response 24 | assert response.respond_to?(:full), "Should expose :full" 25 | 26 | # This won't page, but it should work anyway 27 | assert_equal 6, response.full.count 28 | response.full.each do |set| 29 | assert_kind_of OAI::Set, set 30 | end 31 | end 32 | 33 | end 34 | 35 | -------------------------------------------------------------------------------- /test/client/tc_low_resolution_dates.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper_client' 2 | 3 | class LowResolutionDatesTest < Test::Unit::TestCase 4 | 5 | def test_low_res_date_parsing 6 | client = OAI::Client.new 'http://localhost:3333/oai' 7 | 8 | date = Date.new 2003, 1, 1 9 | 10 | # get a list of identifier headers 11 | assert_nothing_raised { client.list_identifiers :from => date } 12 | end 13 | 14 | end 15 | -------------------------------------------------------------------------------- /test/client/tc_utf8_escaping.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper_client' 2 | 3 | class UTF8Test < Test::Unit::TestCase 4 | def client 5 | @client ||= OAI::Client.new 'http://localhost:3333/oai' 6 | end 7 | 8 | def test_escaping_invalid_utf_8_characters 9 | invalid_utf_8 = [2, 3, 4, 104, 5, 101, 6, 108, 66897, 108, 66535, 111, 1114112, 33, 55234123, 33].pack("U*") 10 | invalid_utf_8 = invalid_utf_8.force_encoding("binary") if invalid_utf_8.respond_to? :force_encoding 11 | assert_equal("hello!!", client.send(:strip_invalid_utf_8_chars, invalid_utf_8).gsub(/\?/, '')) 12 | end 13 | 14 | def test_unescaped_ampersand_content_correction 15 | src = 'Frankie & Johnny ' 16 | expected = 'Frankie & Johnny ' 17 | assert_equal(expected, client.sanitize_xml(src)) 18 | end 19 | end 20 | -------------------------------------------------------------------------------- /test/client/tc_xpath.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper_client' 2 | 3 | class XpathTest < Test::Unit::TestCase 4 | include OAI::XPath 5 | 6 | def test_rexml 7 | require 'rexml/document' 8 | doc = REXML::Document.new(File.new('test/test.xml')) 9 | assert_equal xpath(doc, './/responseDate'), '2006-09-11T14:33:15Z' 10 | assert_equal xpath(doc, './/foobar'), nil 11 | end 12 | 13 | def test_libxml 14 | begin 15 | require 'xml/libxml' 16 | rescue LoadError 17 | # libxml not available so nothing to test! 18 | return 19 | end 20 | 21 | doc = LibXML::XML::Document.file('test/test.xml') 22 | assert_equal xpath(doc, './/responseDate'), '2006-09-11T14:33:15Z' 23 | assert_equal xpath(doc, './/foobar'), nil 24 | end 25 | 26 | end 27 | -------------------------------------------------------------------------------- /test/client/test_helper_client.rb: -------------------------------------------------------------------------------- 1 | require 'oai' 2 | require 'test/unit' 3 | 4 | require File.dirname(__FILE__) + '/helpers/provider' 5 | require File.dirname(__FILE__) + '/helpers/test_wrapper' 6 | -------------------------------------------------------------------------------- /test/harvester/tc_harvest.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper_harvester' 2 | 3 | class HarvestTest < Test::Unit::TestCase 4 | ONE_HOUR = 3600 5 | EARLIEST_FIXTURE = "1998-05-02T04:00:00Z" 6 | LATEST_FIXTURE = "2005-12-25T05:00:00Z" 7 | def test_harvest 8 | until_value = Time.now.utc - ONE_HOUR 9 | config = OpenStruct.new(sites: { 'test' => { 'url' => 'http://localhost:3333/oai' }}) 10 | OAI::Harvester::Harvest.new(config).start 11 | last = config.sites.dig('test', 'last') 12 | assert_kind_of Time, last 13 | assert last >= (until_value + ONE_HOUR), "#{last} < #{(until_value + ONE_HOUR)}" 14 | end 15 | 16 | def test_harvest_from_last 17 | from_value = Time.parse(LATEST_FIXTURE).utc 18 | now = Time.now.utc 19 | config = OpenStruct.new(sites: { 'test' => { 'url' => 'http://localhost:3333/oai' }}) 20 | OAI::Harvester::Harvest.new(config, nil, from_value).start 21 | last = config.sites.dig('test', 'last') 22 | assert last >= now, "#{last} < #{now}" 23 | end 24 | 25 | def test_harvest_after_last 26 | from_value = Time.parse(LATEST_FIXTURE).utc + 1 27 | config = OpenStruct.new(sites: { 'test' => { 'url' => 'http://localhost:3333/oai' }}) 28 | OAI::Harvester::Harvest.new(config, nil, from_value).start 29 | last = config.sites.dig('test', 'last') 30 | assert_kind_of NilClass, last 31 | end 32 | 33 | def test_harvest_with_until 34 | until_value = Time.parse(EARLIEST_FIXTURE).utc + ONE_HOUR 35 | config = OpenStruct.new(sites: { 'test' => { 'url' => 'http://localhost:3333/oai' }}) 36 | OAI::Harvester::Harvest.new(config, nil, nil, until_value).start 37 | last = config.sites.dig('test', 'last') 38 | assert_kind_of Time, last 39 | assert_equal last, until_value 40 | end 41 | end 42 | 43 | -------------------------------------------------------------------------------- /test/harvester/test_helper_harvester.rb: -------------------------------------------------------------------------------- 1 | require 'oai' 2 | require 'oai/harvester' 3 | require 'test/unit' 4 | 5 | require File.dirname(__FILE__) + '/../client/helpers/provider' 6 | require File.dirname(__FILE__) + '/../client/helpers/test_wrapper' 7 | -------------------------------------------------------------------------------- /test/provider/models.rb: -------------------------------------------------------------------------------- 1 | class Record 2 | attr_accessor :id, :titles, :creator, :tags, :sets, :updated_at, :deleted 3 | 4 | def initialize(id, 5 | titles = 'title', 6 | creator = 'creator', 7 | tags = 'tag', 8 | sets = nil, 9 | deleted = false, 10 | updated_at = Time.now.utc.xmlschema) 11 | 12 | @id = id 13 | @titles = titles 14 | @creator = creator 15 | @tags = tags 16 | @sets = sets 17 | @deleted = deleted 18 | @updated_at = updated_at 19 | end 20 | 21 | # Override Object.id 22 | def id 23 | @id 24 | end 25 | 26 | def in_set(spec) 27 | if @sets.respond_to?(:each) 28 | @sets.each { |set| return true if set.spec == spec } 29 | else 30 | return true if @sets.spec == spec 31 | end 32 | false 33 | end 34 | end 35 | 36 | class TestModel < OAI::Provider::Model 37 | include OAI::Provider 38 | 39 | def initialize(limit = nil) 40 | super(limit) 41 | @records = [] 42 | @sets = [] 43 | @earliest = Time.now.utc.xmlschema 44 | end 45 | 46 | def earliest 47 | (@records.min {|a,b| a.updated_at <=> b.updated_at }).updated_at.utc.xmlschema 48 | end 49 | 50 | def latest 51 | @records.max {|a,b| a.updated_at <=> b.updated_at }.updated_at.utc.xmlschema 52 | end 53 | 54 | def sets 55 | @sets 56 | end 57 | 58 | def find(selector, opts={}) 59 | return nil unless selector 60 | 61 | case selector 62 | when :all 63 | if opts[:resumption_token] 64 | raise OAI::ResumptionTokenException.new unless @limit 65 | begin 66 | token = ResumptionToken.parse(opts[:resumption_token]) 67 | 68 | if token.last < @groups.size - 1 69 | PartialResult.new(@groups[token.last], token.next(token.last + 1)) 70 | else 71 | PartialResult.new(@groups[token.last], token.next(nil)) 72 | end 73 | rescue 74 | raise OAI::ResumptionTokenException.new 75 | end 76 | else 77 | records = @records.select do |rec| 78 | ((opts[:set].nil? || rec.in_set(opts[:set])) && 79 | (opts[:from].nil? || rec.updated_at >= opts[:from].to_time) && 80 | (opts[:until].nil? || rec.updated_at <= opts[:until].to_time)) 81 | #else 82 | # ((opts[:set].nil? || rec.in_set(opts[:set])) && 83 | # (opts[:from].nil? || rec.updated_at >= opts[:from]) && 84 | # (opts[:until].nil? || rec.updated_at <= opts[:until])) 85 | #end 86 | end 87 | 88 | if @limit && records.size > @limit 89 | @groups = generate_chunks(records, @limit) 90 | return PartialResult.new(@groups[0], 91 | ResumptionToken.new(opts.merge({:last => 1}))) 92 | end 93 | return records 94 | end 95 | else 96 | begin 97 | @records.each do |record| 98 | return record if record.id.to_s == selector 99 | end 100 | rescue 101 | end 102 | nil 103 | end 104 | end 105 | 106 | def generate_chunks(records, limit) 107 | groups = [] 108 | records.each_slice(limit) do |group| 109 | groups << group 110 | end 111 | groups 112 | end 113 | 114 | def generate_records(number, timestamp = Time.now.utc.xmlschema, sets = [], deleted = false) 115 | @earliest = timestamp.dup if @earliest.nil? || timestamp.to_s < @earliest.to_s 116 | @earliest = timestamp.dup if @earliest.nil? 117 | 118 | # Add any sets we don't already have 119 | sets = [sets] unless sets.respond_to?(:each) 120 | sets.each do |set| 121 | @sets << set unless @sets.include?(set) 122 | end 123 | 124 | # Generate some records 125 | number.times do |id| 126 | rec = Record.new(@records.size, "title_#{id}", "creator_#{id}", "tag_#{id}") 127 | rec.updated_at = timestamp.utc 128 | rec.sets = sets 129 | rec.deleted = deleted 130 | @records << rec 131 | end 132 | end 133 | 134 | end 135 | 136 | class SimpleModel < TestModel 137 | 138 | def initialize 139 | super 140 | # Create a couple of sets 141 | set_one = OAI::Set.new() 142 | set_one.name = "Test Set One" 143 | set_one.spec = "A" 144 | set_one.description = "This is test set one." 145 | 146 | set_two = OAI::Set.new() 147 | set_two.name = "Test Set Two" 148 | set_two.spec = "A:B" 149 | set_two.description = "This is test set two." 150 | 151 | generate_records(5, Time.parse("2002-10-5"), set_one) 152 | generate_records(1, Time.parse("2002-11-5"), [set_two], true) 153 | generate_records(4, Time.parse("2002-11-5"), [set_two]) 154 | end 155 | 156 | end 157 | 158 | class BigModel < TestModel 159 | 160 | def initialize(limit = nil) 161 | super(limit) 162 | generate_records(100, Time.parse("October 2 2000")) 163 | generate_records(100, Time.parse("November 2 2000")) 164 | generate_records(100, Time.parse("December 2 2000")) 165 | generate_records(100, Time.parse("January 2 2001")) 166 | generate_records(100, Time.parse("February 2 2001")) 167 | end 168 | 169 | end 170 | 171 | class MappedModel < TestModel 172 | 173 | def initialize 174 | super 175 | set_one = OAI::Set.new() 176 | set_one.name = "Test Set One" 177 | set_one.spec = "A" 178 | set_one.description = "This is test set one." 179 | 180 | generate_records(5, Time.parse("dec 1 2006"), set_one) 181 | end 182 | 183 | def map_oai_dc 184 | {:title => :creator, :creator => :titles, :subject => :tags} 185 | end 186 | 187 | end 188 | 189 | class ComplexModel < TestModel 190 | 191 | def initialize(limit = nil) 192 | super(limit) 193 | # Create a couple of sets 194 | set_one = OAI::Set.new 195 | set_one.name = "Set One" 196 | set_one.spec = "One" 197 | set_one.description = "This is test set one." 198 | 199 | set_two = OAI::Set.new 200 | set_two.name = "Set Two" 201 | set_two.spec = "Two" 202 | set_two.description = "This is test set two." 203 | 204 | set_three = OAI::Set.new 205 | set_three.name = "Set Three" 206 | set_three.spec = "Three" 207 | set_three.description = "This is test set three." 208 | 209 | set_four = OAI::Set.new 210 | set_four.name = "Set Four" 211 | set_four.spec = "Four" 212 | set_four.description = "This is test set four." 213 | 214 | set_one_two = OAI::Set.new 215 | set_one_two.name = "Set One and Two" 216 | set_one_two.spec = "One:Two" 217 | set_one_two.description = "This is combination set of One and Two." 218 | 219 | set_three_four = OAI::Set.new 220 | set_three_four.name = "Set Three and Four" 221 | set_three_four.spec = "Three:Four" 222 | set_three_four.description = "This is combination set of Three and Four." 223 | 224 | generate_records(250, Time.parse("May 2 1998"), [set_one, set_one_two]) 225 | generate_records(50, Time.parse("June 2 1998"), [set_one, set_one_two], true) 226 | generate_records(50, Time.parse("October 10 1998"), [set_three, set_three_four], true) 227 | generate_records(250, Time.parse("July 2 2002"), [set_two, set_one_two]) 228 | 229 | generate_records(250, Time.parse("September 15 2004"), [set_three, set_three_four]) 230 | generate_records(50, Time.parse("October 10 2004"), [set_three, set_three_four], true) 231 | generate_records(250, Time.parse("December 25 2005"), [set_four, set_three_four]) 232 | end 233 | 234 | def about record 235 | xml = <<-eos 236 | 239 | Ruby OAI test data 240 | 241 | eos 242 | # Removes new-lines and formatting, which is a problem with Ruby 1.8.x 243 | xml.gsub(/\s+/, ' ') 244 | end 245 | end 246 | 247 | -------------------------------------------------------------------------------- /test/provider/tc_exceptions.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper_provider' 2 | 3 | class ProviderExceptions < Test::Unit::TestCase 4 | 5 | def setup 6 | @provider = ComplexProvider.new 7 | end 8 | 9 | def test_argument_exception 10 | assert_raise(OAI::ArgumentException) do 11 | @provider.identify(:identifier => 'invalid_arg') 12 | end 13 | end 14 | 15 | def test_resumption_token_exception 16 | assert_raise(OAI::ResumptionTokenException) do 17 | @provider.list_records(:resumption_token => 'aaadddd:1000') 18 | end 19 | assert_raise(OAI::ResumptionTokenException) do 20 | @provider.list_records(:resumption_token => 'oai_dc:1000') 21 | end 22 | assert_raise(OAI::ResumptionTokenException) do 23 | @provider.list_identifiers(:resumption_token => '..::!:.:!:') 24 | end 25 | assert_raise(OAI::ResumptionTokenException) do 26 | @provider.list_identifiers(:resumption_token => '\:\\:\/$%^&*!@#!:1') 27 | end 28 | end 29 | 30 | def test_bad_verb_raises_exception 31 | assert @provider.process_request(:verb => 'BadVerb') =~ /badVerb/ 32 | assert @provider.process_request(:verb => '\a$#^%!@') =~ /badVerb/ 33 | assert @provider.process_request(:verb => 'identity') =~ /badVerb/ 34 | assert @provider.process_request(:verb => '!!\\$\$\.+') =~ /badVerb/ 35 | end 36 | 37 | def test_bad_format_raises_exception 38 | assert_raise(OAI::FormatException) do 39 | @provider.get_record(:identifier => 'oai:test:1', :metadata_prefix => 'html') 40 | end 41 | end 42 | 43 | def test_missing_format_raises_exception 44 | assert_raise(OAI::ArgumentException) do 45 | @provider.list_records() 46 | end 47 | assert_raise(OAI::ArgumentException) do 48 | @provider.get_record(:identifier => 'oai:test:1') 49 | end 50 | end 51 | 52 | def test_bad_id_raises_exception 53 | badIdentifiers = [ 54 | 'oai:test:5000', 55 | 'oai:test:-1', 56 | 'oai:test:one', 57 | 'oai:test/one', 58 | 'oai:test/1', 59 | 'oai:test:\\$1\1!'] 60 | badIdentifiers.each do |id| 61 | assert_raise(OAI::IdException) do 62 | @provider.get_record(:identifier => id, :metadata_prefix => 'oai_dc') 63 | end 64 | end 65 | end 66 | 67 | def test_no_records_match_dates_that_are_out_of_range 68 | assert_raise(OAI::NoMatchException) do 69 | @provider.list_records(:metadata_prefix => 'oai_dc', 70 | :from => Time.parse("November 2 2000"), 71 | :until => Time.parse("November 1 2000")) 72 | end 73 | end 74 | 75 | def test_no_records_match_bad_set 76 | assert_raise(OAI::NoMatchException) do 77 | @provider.list_records(:metadata_prefix => 'oai_dc', :set => 'unknown') 78 | end 79 | end 80 | 81 | end 82 | -------------------------------------------------------------------------------- /test/provider/tc_functional_tokens.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper_provider' 2 | 3 | class ResumptionTokenFunctionalTest < Test::Unit::TestCase 4 | include REXML 5 | 6 | def setup 7 | @provider = ComplexProvider.new 8 | end 9 | 10 | def test_resumption_tokens 11 | assert_nothing_raised do 12 | Document.new(@provider.list_records(:metadata_prefix => 'oai_dc')) 13 | end 14 | doc = Document.new(@provider.list_records(:metadata_prefix => 'oai_dc')) 15 | assert_not_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"] 16 | assert_equal (@provider.model.limit + 1), doc.elements["/OAI-PMH/ListRecords"].to_a.size 17 | token = doc.elements["/OAI-PMH/ListRecords/resumptionToken"].text 18 | doc = Document.new(@provider.list_records(:resumption_token => token)) 19 | assert_not_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"] 20 | assert_equal (@provider.model.limit + 1), doc.elements["/OAI-PMH/ListRecords"].to_a.size 21 | end 22 | 23 | def test_from_and_until_with_resumption_tokens 24 | # Should return 550 records broken into 5 groups of 100, and a final group of 50. 25 | # checked elements under ListRecords are limit + 1, accounting for the resumptionToken element 26 | assert_nothing_raised do 27 | Document.new(@provider.list_records(:metadata_prefix => 'oai_dc')) 28 | end 29 | doc = Document.new( 30 | @provider.list_records( 31 | :metadata_prefix => 'oai_dc', 32 | :from => Time.parse("September 1 2004"), 33 | :until => Time.parse("December 25 2005")) 34 | ) 35 | assert_equal (@provider.model.limit + 1), doc.elements["/OAI-PMH/ListRecords"].to_a.size 36 | assert_not_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"] 37 | token = doc.elements["/OAI-PMH/ListRecords/resumptionToken"].text 38 | 39 | 4.times do 40 | doc = Document.new(@provider.list_records(:resumption_token => token)) 41 | assert_not_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"] 42 | assert_equal (@provider.model.limit + 1), doc.elements["/OAI-PMH/ListRecords"].to_a.size 43 | token = doc.elements["/OAI-PMH/ListRecords/resumptionToken"].text 44 | end 45 | 46 | doc = Document.new(@provider.list_records(:resumption_token => token)) 47 | # assert that ListRecords includes remaining records and an empty resumption token 48 | assert_equal (551 % @provider.model.limit), doc.elements["/OAI-PMH/ListRecords"].to_a.size 49 | assert_not_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"] 50 | assert_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"].text 51 | end 52 | 53 | end 54 | -------------------------------------------------------------------------------- /test/provider/tc_instance_provider.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper_provider' 2 | 3 | class TestInstanceProvider < Test::Unit::TestCase 4 | 5 | # Prior to the commit introducing this code, the InstanceProvider#identify 6 | # method would instantiate a Response::Identify object, passing the 7 | # InstanceProvider class as the provider for the Response::Identify 8 | # instance. With the commit introducing this test, the 9 | # InstanceProvider#identify now passes the instance of InstanceProvider 10 | # to the instantiation of Response::Identify. 11 | # 12 | # Thus we can override, on an instance by instance basis, the behavior of a 13 | # response object. 14 | def test_instance_used_in_responses 15 | @url_path = "/stringy-mc-string-face" 16 | @instance_provider = InstanceProvider.new({ :provider_context => :instance_based, :url_path => @url_path }) 17 | 18 | xml = @instance_provider.identify 19 | doc = REXML::Document.new(xml) 20 | assert_equal "http://localhost#{@url_path}", doc.elements["OAI-PMH/Identify/baseURL"].text 21 | end 22 | 23 | def test_class_used_in_responses 24 | @url_path = "/stringy-mc-string-face" 25 | @instance_provider = InstanceProvider.new({ :provider_context => :class_based, :url_path => @url_path }) 26 | 27 | xml = @instance_provider.identify 28 | doc = REXML::Document.new(xml) 29 | assert_equal "http://localhost", doc.elements["OAI-PMH/Identify/baseURL"].text 30 | end 31 | 32 | def test_by_default_class_used_in_responses 33 | @url_path = "/stringy-mc-string-face" 34 | @instance_provider = InstanceProvider.new({ :url_path => @url_path }) 35 | 36 | xml = @instance_provider.identify 37 | doc = REXML::Document.new(xml) 38 | assert_equal "http://localhost", doc.elements["OAI-PMH/Identify/baseURL"].text 39 | end 40 | 41 | end 42 | -------------------------------------------------------------------------------- /test/provider/tc_provider.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper_provider' 2 | 3 | class OaiTest < Test::Unit::TestCase 4 | 5 | def setup 6 | @mapped_provider = MappedProvider.new 7 | @big_provider = BigProvider.new 8 | @described_provider = DescribedProvider.new 9 | end 10 | 11 | def test_additional_description 12 | doc = REXML::Document.new(@described_provider.identify) 13 | assert_equal "oai:test:13900", doc.elements['OAI-PMH/Identify/description/oai-identifier/sampleIdentifier'].text 14 | assert_not_nil doc.elements['OAI-PMH/Identify/my_custom_xml'] 15 | end 16 | 17 | def test_list_identifiers_for_correct_xml 18 | doc = REXML::Document.new(@mapped_provider.list_identifiers) 19 | assert_not_nil doc.elements['OAI-PMH/request'] 20 | assert_not_nil doc.elements['OAI-PMH/request/@verb'] 21 | assert_not_nil doc.elements['OAI-PMH/ListIdentifiers'] 22 | assert_not_nil doc.elements['OAI-PMH/ListIdentifiers/header'] 23 | assert_not_nil doc.elements['OAI-PMH/ListIdentifiers/header/identifier'] 24 | assert_not_nil doc.elements['OAI-PMH/ListIdentifiers/header/datestamp'] 25 | assert_not_nil doc.elements['OAI-PMH/ListIdentifiers/header/setSpec'] 26 | end 27 | 28 | def test_list_records_for_correct_xml 29 | doc = REXML::Document.new( 30 | @mapped_provider.list_records(:metadata_prefix => 'oai_dc')) 31 | assert_not_nil doc.elements['OAI-PMH/request'] 32 | assert_not_nil doc.elements['OAI-PMH/request/@verb'] 33 | assert_not_nil doc.elements['OAI-PMH/request/@metadata_prefix'] 34 | assert_not_nil doc.elements['OAI-PMH/ListRecords/record/header'] 35 | assert_not_nil doc.elements['OAI-PMH/ListRecords/record/metadata'] 36 | end 37 | 38 | def test_mapped_source 39 | assert_nothing_raised do 40 | REXML::Document.new( 41 | @mapped_provider.list_records(:metadata_prefix => 'oai_dc')) 42 | end 43 | doc = REXML::Document.new( 44 | @mapped_provider.list_records(:metadata_prefix => 'oai_dc')) 45 | assert_equal "title_0", doc.elements['OAI-PMH/ListRecords/record/metadata/oai_dc:dc/dc:creator'].text 46 | assert_equal "creator_0", doc.elements['OAI-PMH/ListRecords/record/metadata/oai_dc:dc/dc:title'].text 47 | assert_equal "tag_0", doc.elements['OAI-PMH/ListRecords/record/metadata/oai_dc:dc/dc:subject'].text 48 | end 49 | 50 | def test_from 51 | assert_nothing_raised do 52 | REXML::Document.new( 53 | @big_provider.list_records(:metadata_prefix => 'oai_dc')) 54 | end 55 | doc = REXML::Document.new( 56 | @big_provider.list_records( 57 | :metadata_prefix => 'oai_dc', 58 | :from => Time.parse("February 1 2001")) 59 | ) 60 | assert_equal 100, doc.elements['OAI-PMH/ListRecords'].to_a.size 61 | 62 | doc = REXML::Document.new( 63 | @big_provider.list_records( 64 | :metadata_prefix => 'oai_dc', 65 | :from => Time.parse("January 1 2001")) 66 | ) 67 | assert_equal 200, doc.elements['OAI-PMH/ListRecords'].to_a.size 68 | end 69 | 70 | def test_until 71 | assert_nothing_raised do 72 | REXML::Document.new( 73 | @big_provider.list_records(:metadata_prefix => 'oai_dc')) 74 | end 75 | doc = REXML::Document.new( 76 | @big_provider.list_records( 77 | :metadata_prefix => 'oai_dc', :until => Time.parse("November 1 2000")) 78 | ) 79 | assert_equal 100, doc.elements['OAI-PMH/ListRecords'].to_a.size 80 | end 81 | 82 | def test_from_and_until_match 83 | assert_nothing_raised do 84 | @big_provider.list_records( 85 | :metadata_prefix => 'oai_dc', 86 | :from => "2000-11-01T05:00:00Z", 87 | :until => "2000-11-30T05:00:00Z" 88 | ) 89 | end 90 | 91 | assert_nothing_raised do 92 | @big_provider.list_records( 93 | :metadata_prefix => 'oai_dc', 94 | :from => "2000-11-01", 95 | :until => "2000-11-30" 96 | ) 97 | end 98 | 99 | assert_raise(OAI::ArgumentException) do 100 | @big_provider.list_records( 101 | :metadata_prefix => 'oai_dc', 102 | :from => "2000-11-01T05:00:00Z", 103 | :until => "2000-11-30" 104 | ) 105 | end 106 | end 107 | 108 | def test_from_and_until 109 | assert_nothing_raised do 110 | REXML::Document.new( 111 | @big_provider.list_records(:metadata_prefix => 'oai_dc')) 112 | end 113 | doc = REXML::Document.new( 114 | @big_provider.list_records( 115 | :metadata_prefix => 'oai_dc', 116 | :from => Time.parse("November 1 2000"), 117 | :until => Time.parse("November 30 2000")) 118 | ) 119 | 120 | assert_not_nil doc.elements['OAI-PMH/request'] 121 | assert_not_nil doc.elements['OAI-PMH/request/@verb'] 122 | assert_not_nil doc.elements['OAI-PMH/request/@from'] 123 | assert_not_nil doc.elements['OAI-PMH/request/@until'] 124 | 125 | assert_equal 100, doc.elements['OAI-PMH/ListRecords'].to_a.size 126 | 127 | doc = REXML::Document.new( 128 | @big_provider.list_records( 129 | :metadata_prefix => 'oai_dc', 130 | :from => Time.parse("December 1 2000"), 131 | :until => Time.parse("December 31 2000")) 132 | ) 133 | assert_equal 100, doc.elements['OAI-PMH/ListRecords'].to_a.size 134 | end 135 | 136 | end 137 | -------------------------------------------------------------------------------- /test/provider/tc_resumption_tokens.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper_provider' 2 | 3 | class ResumptionTokenTest < Test::Unit::TestCase 4 | include REXML 5 | include OAI::Provider 6 | 7 | def setup 8 | @token = ResumptionToken.new( 9 | :from => Time.utc(2005,"jan",1,17,0,0), 10 | :until => Time.utc(2005,"jan",31,17,0,0), 11 | :set => "A", 12 | :metadata_prefix => "oai_dc", 13 | :last => 1 14 | ) 15 | end 16 | 17 | def test_resumption_token_options_encoding 18 | assert_equal "oai_dc.s(A).f(2005-01-01T17:00:00Z).u(2005-01-31T17:00:00Z)", 19 | @token.to_s 20 | end 21 | 22 | def test_resumption_token_next_method 23 | assert_equal 100, @token.next(100).last 24 | end 25 | 26 | def test_resumption_token_to_condition_hash 27 | hash = @token.to_conditions_hash 28 | assert_equal @token.from, hash[:from] 29 | assert_equal @token.until, hash[:until] 30 | assert_equal @token.set, hash[:set] 31 | assert_equal @token.prefix, hash[:metadata_prefix] 32 | end 33 | 34 | def test_resumption_token_parsing 35 | new_token = ResumptionToken.parse( 36 | "oai_dc.s(A).f(2005-01-01T17:00:00Z).u(2005-01-31T17:00:00Z):1" 37 | ) 38 | assert_equal @token, new_token 39 | end 40 | 41 | def test_resumption_token_to_xml 42 | doc = REXML::Document.new(@token.to_xml) 43 | assert_equal "#{@token.to_s}:#{@token.last}", doc.elements['/resumptionToken'].text 44 | end 45 | 46 | def test_resumption_token_id_does_not_need_to_be_numeric 47 | serialized = "oai_dc.s(A).f(2005-01-01T17:00:00Z).u(2005-01-31T17:00:00Z):FA129C" 48 | 49 | token = ResumptionToken.parse(serialized) 50 | assert_equal serialized, token.send(:encode_conditions) 51 | end 52 | end 53 | -------------------------------------------------------------------------------- /test/provider/tc_simple_provider.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper_provider' 2 | 3 | class TestSimpleProvider < Test::Unit::TestCase 4 | 5 | def setup 6 | @simple_provider = SimpleProvider.new 7 | @model = @simple_provider.class.model 8 | end 9 | 10 | def test_identify 11 | doc = REXML::Document.new(@simple_provider.identify) 12 | assert_equal @simple_provider.class.name, 13 | doc.elements["/OAI-PMH/Identify/repositoryName"].text 14 | assert_equal SimpleModel.new.earliest.to_s, 15 | doc.elements["/OAI-PMH/Identify/earliestDatestamp"].text 16 | 17 | # PC 18 | # lambda { REXML::Document.new(@simple_provider.identify(:set => 'A')) } 19 | 20 | 21 | end 22 | 23 | def test_list_sets 24 | doc = REXML::Document.new(@simple_provider.list_sets) 25 | sets = doc.elements["/OAI-PMH/ListSets"] 26 | assert_equal @model.sets.size, sets.size 27 | assert_equal @model.sets[0].name, sets[0].elements["//setName"].text 28 | end 29 | 30 | def test_metadata_formats 31 | assert_nothing_raised { REXML::Document.new(@simple_provider.list_metadata_formats) } 32 | doc = REXML::Document.new(@simple_provider.list_metadata_formats) 33 | assert_equal "oai_dc", 34 | doc.elements['/OAI-PMH/ListMetadataFormats/metadataFormat/metadataPrefix'].text 35 | end 36 | 37 | def test_metadata_formats_for_document 38 | assert_nothing_raised { REXML::Document.new(@simple_provider.list_metadata_formats(:identifier => "oai:test:1")) } 39 | doc = REXML::Document.new(@simple_provider.list_metadata_formats) 40 | assert_equal "oai_dc", 41 | doc.elements['/OAI-PMH/ListMetadataFormats/metadataFormat/metadataPrefix'].text 42 | end 43 | 44 | def test_list_records_without_constraints 45 | assert_nothing_raised { REXML::Document.new(@simple_provider.list_records(:metadata_prefix => 'oai_dc')) } 46 | 47 | total = @model.find(:all).size 48 | doc = REXML::Document.new(@simple_provider.list_records(:metadata_prefix => 'oai_dc')) 49 | assert_equal total, doc.elements['OAI-PMH/ListRecords'].size 50 | end 51 | 52 | def test_list_records_with_from_constraints 53 | assert_nothing_raised { REXML::Document.new(@simple_provider.list_records(:metadata_prefix => 'oai_dc')) } 54 | 55 | total = @model.find(:all).size 56 | doc = REXML::Document.new(@simple_provider.list_records(:metadata_prefix => 'oai_dc', from: "2002-10-05")) 57 | assert_equal total, doc.elements['OAI-PMH/ListRecords'].size 58 | end 59 | 60 | def test_list_records_with_until_constraints 61 | assert_nothing_raised { REXML::Document.new(@simple_provider.list_records(:metadata_prefix => 'oai_dc')) } 62 | 63 | total = @model.find(:all).size 64 | doc = REXML::Document.new(@simple_provider.list_records(:metadata_prefix => 'oai_dc', until: "2002-11-05")) 65 | assert_equal total, doc.elements['OAI-PMH/ListRecords'].size 66 | end 67 | 68 | def test_list_records_with_set_equal_a 69 | total = @model.find(:all, :set => 'A').size 70 | doc = REXML::Document.new(@simple_provider.list_records(:metadata_prefix => 'oai_dc', :set => 'A')) 71 | assert_equal total, doc.elements['OAI-PMH/ListRecords'].size 72 | end 73 | 74 | def test_list_record_with_set_equal_ab 75 | total = @model.find(:all, :set => 'A:B').size 76 | doc = REXML::Document.new(@simple_provider.list_records(:metadata_prefix => 'oai_dc', :set => 'A:B')) 77 | assert_equal total, doc.elements['OAI-PMH/ListRecords'].size 78 | end 79 | 80 | def test_list_identifiers_without_constraints 81 | assert_nothing_raised { REXML::Document.new(@simple_provider.list_identifiers(:metadata_prefix => 'oai_dc')) } 82 | 83 | total = @model.find(:all).size 84 | doc = REXML::Document.new(@simple_provider.list_identifiers(:metadata_prefix => 'oai_dc')) 85 | assert_equal total, doc.elements['OAI-PMH/ListIdentifiers'].to_a.size 86 | end 87 | 88 | def test_list_identifiers_with_set_equal_a 89 | total = @model.find(:all, :set => 'A').size 90 | doc = REXML::Document.new(@simple_provider.list_identifiers(:metadata_prefix => 'oai_dc', :set => 'A')) 91 | assert_equal total, doc.elements['OAI-PMH/ListIdentifiers'].to_a.size 92 | end 93 | 94 | def test_list_indentifiers_with_set_equal_ab 95 | total = @model.find(:all, :set => 'A:B').size 96 | doc = REXML::Document.new(@simple_provider.list_identifiers(:metadata_prefix => 'oai_dc', :set => 'A:B')) 97 | assert_equal total, doc.elements['OAI-PMH/ListIdentifiers'].to_a.size 98 | end 99 | 100 | def test_get_record 101 | assert_nothing_raised do 102 | REXML::Document.new( 103 | @simple_provider.get_record( 104 | :identifier => 'oai:test:1', 105 | :metadataPrefix => 'oai_dc' 106 | ) 107 | ) 108 | end 109 | doc = REXML::Document.new( 110 | @simple_provider.get_record( 111 | :identifier => 'oai:test:1', 112 | :metadataPrefix => 'oai_dc' 113 | ) 114 | ) 115 | assert_equal 'oai:test:1', 116 | doc.elements['OAI-PMH/GetRecord/record/header/identifier'].text 117 | end 118 | 119 | def test_deleted_record 120 | assert_nothing_raised do 121 | REXML::Document.new( 122 | @simple_provider.get_record( 123 | :identifier => 'oai:test:6', 124 | :metadataPrefix => 'oai_dc' 125 | ) 126 | ) 127 | end 128 | doc = REXML::Document.new( 129 | @simple_provider.get_record( 130 | :identifier => 'oai:test:5', 131 | :metadataPrefix => 'oai_dc' 132 | ) 133 | ) 134 | assert_equal 'oai:test:5', doc.elements['OAI-PMH/GetRecord/record/header/identifier'].text 135 | assert_equal 'deleted', doc.elements['OAI-PMH/GetRecord/record/header'].attributes["status"] 136 | end 137 | 138 | end 139 | -------------------------------------------------------------------------------- /test/provider/test_helper_provider.rb: -------------------------------------------------------------------------------- 1 | require 'oai' 2 | require 'test/unit' 3 | 4 | require File.dirname(__FILE__) + '/models' 5 | include OAI 6 | 7 | class SimpleProvider < Provider::Base 8 | repository_name 'Test Provider' 9 | record_prefix 'oai:test' 10 | source_model SimpleModel.new 11 | end 12 | 13 | class BigProvider < Provider::Base 14 | repository_name 'Another Provider' 15 | record_prefix 'oai:test' 16 | source_model BigModel.new 17 | end 18 | 19 | class TokenProvider < Provider::Base 20 | repository_name 'Token Provider' 21 | record_prefix 'oai:test' 22 | source_model BigModel.new(25) 23 | end 24 | 25 | class MappedProvider < Provider::Base 26 | repository_name 'Mapped Provider' 27 | record_prefix 'oai:test' 28 | source_model MappedModel.new 29 | end 30 | 31 | class ComplexProvider < Provider::Base 32 | repository_name 'Complex Provider' 33 | repository_url 'http://localhost' 34 | record_prefix 'oai:test' 35 | source_model ComplexModel.new(100) 36 | end 37 | 38 | class DescribedProvider < Provider::Base 39 | repository_name 'Described PRovider' 40 | repository_url 'http://localhost' 41 | record_prefix 'oai:test' 42 | source_model SimpleModel.new 43 | sample_id '13900' 44 | extra_description "" 45 | end 46 | 47 | class InstanceProvider < Provider::Base 48 | repository_name 'Instance Provider' 49 | record_prefix 'oai:test' 50 | repository_url 'http://localhost' 51 | source_model SimpleModel.new 52 | 53 | def initialize(options = {}) 54 | super 55 | @url_path = options.fetch(:url_path) 56 | end 57 | attr_reader :url_path 58 | 59 | def url 60 | File.join(super, url_path) 61 | end 62 | end 63 | -------------------------------------------------------------------------------- /test/test.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 2006-09-11T14:33:15Z 4 | http://www.pubmedcentral.gov/oai/oai.cgi 5 | 6 | PubMed Central (PMC3 - NLM DTD) 7 | http://www.pubmedcentral.gov/oai/oai.cgi 8 | 2.0 9 | oai@ncbi.nlm.nih.gov 10 | 1999-01-01 11 | no 12 | YYYY-MM-DD 13 | 14 | 15 | oai 16 | pubmedcentral.gov 17 | : 18 | oai:pubmedcentral.gov:13900 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /tools/generate_fixtures.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # 3 | # Created by William Groppe on 2007-01-17. 4 | require 'yaml' 5 | 6 | # Dublin Core fields 7 | FIELDS = %w{title creator subject description contributor publisher 8 | date type format source language relation coverage rights} 9 | 10 | unless ARGV[0] 11 | puts "Please specify how many records to generate." 12 | exit 13 | end 14 | 15 | # Hash for records 16 | records = {} 17 | 18 | ARGV[0].to_i.times do |i| 19 | records[i] = 20 | Hash[*FIELDS.collect { |field| [field, "#{field}_#{i}"] }.flatten] 21 | end 22 | 23 | puts records.to_yaml 24 | 25 | --------------------------------------------------------------------------------