├── Gemfile ├── lib ├── sec_query │ ├── version.rb │ ├── entity.rb │ ├── filing_detail.rb │ ├── sec_uri.rb │ └── filing.rb └── sec_query.rb ├── Rakefile ├── .gitignore ├── spec ├── support │ └── vcr.rb ├── spec_helper.rb └── sec_query │ ├── sec_uri_spec.rb │ ├── entity_spec.rb │ └── filing_spec.rb ├── .github └── workflows │ └── ruby.yml ├── sec_query.gemspec └── README.md /Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | # Specify your gem's dependencies in sec_query.gemspec 4 | gemspec 5 | -------------------------------------------------------------------------------- /lib/sec_query/version.rb: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | # => SecQuery::VERSION 3 | module SecQuery 4 | VERSION = "1.5.0" 5 | end 6 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "bundler/gem_tasks" 2 | require 'rspec/core/rake_task' 3 | 4 | RSpec::Core::RakeTask.new(:spec) 5 | 6 | task test: :spec 7 | 8 | task default: :test 9 | 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.gem 3 | *.rbc 4 | .bundle 5 | .config 6 | .yardoc 7 | .ruby-gemset 8 | .ruby-version 9 | Gemfile.lock 10 | InstalledFiles 11 | _yardoc 12 | coverage 13 | doc/ 14 | lib/bundler/man 15 | pkg 16 | pkg/* 17 | rdoc 18 | spec/support/vcr_cassettes 19 | spec/support/vcr_cassettes/* -------------------------------------------------------------------------------- /spec/support/vcr.rb: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | 3 | require 'vcr' 4 | VCR.configure do |c| 5 | c.cassette_library_dir = 'spec/support/vcr_cassettes' 6 | c.hook_into :webmock 7 | c.ignore_localhost = true 8 | c.default_cassette_options = { record: :new_episodes } 9 | c.configure_rspec_metadata! 10 | end 11 | -------------------------------------------------------------------------------- /lib/sec_query.rb: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | 3 | # external 4 | require 'active_support/all' 5 | require 'addressable/uri' 6 | require 'open-uri' 7 | require 'rest-client' 8 | require 'rss' 9 | require 'nokogiri' 10 | require 'rubygems' 11 | 12 | # internal 13 | require 'sec_query/entity' 14 | require 'sec_query/filing' 15 | require 'sec_query/filing_detail' 16 | require 'sec_query/sec_uri' 17 | require 'sec_query/version' 18 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | 3 | require 'rubygems' 4 | require 'bundler/setup' 5 | require 'sec_query' 6 | require 'support/vcr' 7 | 8 | def is_valid?(entity) 9 | expect(entity).to_not be_nil 10 | expect(entity.name).to eq query[:name] 11 | expect(entity.cik).to eq query[:cik] 12 | entity.instance_variables.each do |key| 13 | expect(SecQuery::Entity::COLUMNS).to include(key[1..-1].to_sym) 14 | end 15 | end 16 | 17 | def is_valid_address?(address) 18 | expect(address).to_not be_nil 19 | address.keys.each do |key| 20 | expect(['city', 'state', 'street1', 'street2', 'type', 'zip', 'phone']).to include(key) 21 | end 22 | end 23 | 24 | def is_valid_filing?(filing) 25 | expect(filing).to_not be_nil 26 | filing.instance_variables.each do |key| 27 | expect(SecQuery::Filing::COLUMNS).to include(key[1..-1].to_sym) 28 | end 29 | end -------------------------------------------------------------------------------- /.github/workflows/ruby.yml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. 2 | # They are provided by a third-party and are governed by 3 | # separate terms of service, privacy policy, and support 4 | # documentation. 5 | # This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake 6 | # For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby 7 | 8 | name: Ruby 9 | 10 | on: 11 | push: 12 | branches: [ main ] 13 | pull_request: 14 | branches: [ main ] 15 | 16 | jobs: 17 | test: 18 | 19 | runs-on: ubuntu-latest 20 | strategy: 21 | matrix: 22 | ruby-version: ['2.6', '2.7', '3.0'] 23 | 24 | steps: 25 | - uses: actions/checkout@v2 26 | - name: Set up Ruby 27 | # To automatically get bug fixes and new Ruby versions for ruby/setup-ruby, 28 | # change this to (see https://github.com/ruby/setup-ruby#versioning): 29 | # uses: ruby/setup-ruby@v1 30 | uses: ruby/setup-ruby@473e4d8fe5dd94ee328fdfca9f8c9c7afc9dae5e 31 | with: 32 | ruby-version: ${{ matrix.ruby-version }} 33 | bundler-cache: true # runs 'bundle install' and caches installed gems automatically 34 | - name: Run tests 35 | run: bundle exec rake 36 | -------------------------------------------------------------------------------- /sec_query.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | $:.push File.expand_path('../lib', __FILE__) 3 | 4 | require 'sec_query/version' 5 | 6 | Gem::Specification.new do |s| 7 | s.name = 'sec_query' 8 | s.version = SecQuery::VERSION 9 | s.authors = ['Ty Rauber'] 10 | s.email = ['tyrauber@mac.com'] 11 | s.license = 'MIT' 12 | s.homepage = 'https://github.com/tyrauber/sec_query' 13 | s.summary = 'A ruby gem for querying the United States Securities and Exchange Commission Edgar System.' 14 | s.description = 'Search for company or person, by name, symbol or Central Index Key (CIK), and retrieve filings.' 15 | 16 | s.rubyforge_project = 'sec_query' 17 | s.files = `git ls-files`.split("\n") 18 | s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n") 19 | s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) } 20 | s.require_paths = ['lib'] 21 | 22 | s.add_development_dependency 'bundler', '~> 2.2.14' 23 | s.add_development_dependency 'rake' 24 | s.add_development_dependency 'rspec', '~> 3.10' 25 | s.add_development_dependency 'vcr', '~> 6.0' 26 | s.add_development_dependency 'webmock', '~> 3.12' 27 | s.add_development_dependency 'rubocop', '~> 1.11' 28 | s.add_runtime_dependency 'rest-client', '~> 2.1.0' 29 | s.add_runtime_dependency 'addressable', '~> 2.7' 30 | s.add_runtime_dependency 'nokogiri', '~> 1' 31 | s.add_runtime_dependency 'activesupport', '~> 5.2' 32 | s.add_runtime_dependency 'rss', '~> 0.2' 33 | end -------------------------------------------------------------------------------- /spec/sec_query/sec_uri_spec.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | include SecQuery 4 | require 'spec_helper' 5 | 6 | describe SecQuery::SecURI do 7 | describe '#browse_edgar_uri' do 8 | it 'builds a default /browse-edgar/ url' do 9 | uri = SecQuery::SecURI.browse_edgar_uri 10 | expect(uri.to_s).to eq('https://www.sec.gov/cgi-bin/browse-edgar') 11 | end 12 | 13 | it 'builds a default /browse-edgar/ url with options: {symbol: "AAPL"}' do 14 | uri = SecQuery::SecURI.browse_edgar_uri(symbol: 'AAPL') 15 | expect(uri.to_s) 16 | .to include('https://www.sec.gov/cgi-bin/browse-edgar?CIK=AAPL') 17 | end 18 | 19 | it 'builds a default /browse-edgar/ url with options: {cik: "AAPL"}' do 20 | uri = SecQuery::SecURI.browse_edgar_uri(cik: 'AAPL') 21 | expect(uri.to_s) 22 | .to include('https://www.sec.gov/cgi-bin/browse-edgar?CIK=AAPL') 23 | end 24 | 25 | it 'builds a default /browse-edgar/ url with options: "AAPL"' do 26 | uri = SecQuery::SecURI.browse_edgar_uri('AAPL') 27 | expect(uri.to_s) 28 | .to eq('https://www.sec.gov/cgi-bin/browse-edgar?CIK=AAPL') 29 | end 30 | 31 | it 'builds a default /browse-edgar/ url with options: "Apple Inc"' do 32 | uri = SecQuery::SecURI.browse_edgar_uri('Apple Inc') 33 | expect(uri.to_s) 34 | .to eq('https://www.sec.gov/cgi-bin/browse-edgar?company=Apple%20Inc') 35 | end 36 | end 37 | 38 | describe 'Date additions' do 39 | subject(:d) { Date.parse('2012-04-26') } 40 | 41 | it 'calculates the correct quarter' do 42 | expect(d.quarter).to eq(2) 43 | end 44 | 45 | it 'calculates the correct sec formatted path uri for a date' do 46 | expect(d.to_sec_uri_format).to eq('2012/QTR2/company.20120426.idx') 47 | end 48 | end 49 | end 50 | -------------------------------------------------------------------------------- /lib/sec_query/entity.rb: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | module SecQuery 3 | # => SecQuery::Entity 4 | # SecQuery::Entity is the root class which is responsible for requesting, 5 | # parsing and initializing SecQuery::Entity intances from SEC Edgar. 6 | class Entity 7 | COLUMNS = [:cik, :name, :mailing_address, :business_address, 8 | :assigned_sic, :assigned_sic_desc, :assigned_sic_href, :assitant_director, :cik_href, 9 | :formerly_names, :state_location, :state_location_href, :state_of_incorporation] 10 | attr_accessor(*COLUMNS) 11 | 12 | def initialize(entity) 13 | COLUMNS.each do |column| 14 | instance_variable_set("@#{ column }", entity[column.to_s]) 15 | end 16 | end 17 | 18 | def filings(args={}) 19 | Filing.find(@cik, 0, 80, args) 20 | end 21 | 22 | def self.query(url) 23 | RestClient.get(url) do |response, request, result, &block| 24 | case response.code 25 | when 200 26 | return response 27 | else 28 | response.return!(request, result, &block) 29 | end 30 | end 31 | end 32 | 33 | def self.find(entity_args) 34 | temp = {} 35 | temp[:url] = SecURI.browse_edgar_uri(entity_args) 36 | temp[:url][:action] = :getcompany 37 | response = query(temp[:url].output_atom.to_s) 38 | document = Nokogiri::HTML(response) 39 | xml = document.xpath("//feed/company-info") 40 | Entity.new(parse(xml)) 41 | end 42 | 43 | def self.parse(xml) 44 | content = Hash.from_xml(xml.to_s) 45 | if content['company_info'].present? 46 | content = content['company_info'] 47 | content['name'] = content.delete('conformed_name') 48 | if content['formerly_names'].present? 49 | content['formerly_names'] = content.delete('formerly_names')['names'] 50 | end 51 | content['addresses']['address'].each do |address| 52 | content["#{address['type']}_address"] = address unless address.nil? 53 | end 54 | return content 55 | else 56 | return {} 57 | end 58 | end 59 | end 60 | end 61 | -------------------------------------------------------------------------------- /lib/sec_query/filing_detail.rb: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | 3 | module SecQuery 4 | # => SecQuery::FilingDetail 5 | # SecQuery::FilingDetail requests and parses Filing Detail for any given SecQuery::Filing 6 | class FilingDetail 7 | COLUMNS = [:link, :filing_date, :accepted_date, :period_of_report, :sec_access_number, :document_count, :format_files, :data_files] 8 | 9 | attr_accessor(*COLUMNS) 10 | 11 | def initialize(filing_detail) 12 | COLUMNS.each do |column| 13 | instance_variable_set("@#{ column }", filing_detail[column]) 14 | end 15 | end 16 | 17 | def self.fetch(uri) 18 | response = RestClient::Request.execute(method: :get, url: uri.to_s.gsub('http:', 'https:'), timeout: 10) 19 | document = Nokogiri::HTML(response.body) 20 | filing_date = document.xpath('//*[@id="formDiv"]/div[2]/div[1]/div[2]').text 21 | accepted_date = document.xpath('//*[@id="formDiv"]/div[2]/div[1]/div[4]').text 22 | period_of_report = document.xpath('//*[@id="formDiv"]/div[2]/div[2]/div[2]').text 23 | sec_access_number = document.xpath('//*[@id="secNum"]/text()').text.strip 24 | document_count = document.xpath('//*[@id="formDiv"]/div[2]/div[1]/div[6]').text.to_i 25 | format_files_table = document.xpath("//table[@summary='Document Format Files']") 26 | data_files_table = document.xpath("//table[@summary='Data Files']") 27 | 28 | format_files = (parsed = parse_files(format_files_table)) && (parsed || []) 29 | data_files = (parsed = parse_files(data_files_table)) && (parsed || []) 30 | 31 | new({uri: uri, 32 | filing_date: filing_date, 33 | accepted_date: accepted_date, 34 | period_of_report: period_of_report, 35 | sec_access_number: sec_access_number, 36 | document_count: document_count, 37 | format_files: format_files, 38 | data_files: data_files}) 39 | end 40 | 41 | def self.parse_files(format_files_table) 42 | # get table headers 43 | headers = [] 44 | format_files_table.xpath('//th').each do |th| 45 | headers << th.text 46 | end 47 | 48 | # get table rows 49 | rows = [] 50 | format_files_table.xpath('//tr').each_with_index do |row, i| 51 | rows[i] = {} 52 | row.xpath('td').each_with_index do |td, j| 53 | if td.children.first && td.children.first.name == 'a' 54 | relative_url = td.children.first.attributes.first[1].value 55 | rows[i][headers[j]] = { 56 | 'link' => "https://www.sec.gov#{relative_url}", 57 | 'text' => td.text.gsub(/\A\p{Space}*/, '') 58 | } 59 | else 60 | rows[i][headers[j]] = td.text.gsub(/\A\p{Space}*/, '') 61 | end 62 | end 63 | end 64 | 65 | rows.reject(&:empty?) 66 | end 67 | end 68 | end 69 | -------------------------------------------------------------------------------- /lib/sec_query/sec_uri.rb: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | 3 | class Date 4 | def quarter 5 | ((month / 3.0) - 0.1).floor + 1 6 | end 7 | 8 | def to_sec_uri_format 9 | today = Date.today 10 | if today.quarter == quarter && today.year == year 11 | "company.#{ strftime("%Y%m%d") }.idx" 12 | else 13 | "#{ year }/QTR#{ quarter }/company.#{ strftime("%Y%m%d") }.idx" 14 | end 15 | end 16 | end 17 | 18 | module SecQuery 19 | class SecURI 20 | attr_accessor :host, :scheme, :path, :query_values 21 | 22 | def self.browse_edgar_uri(args = nil) 23 | build_with_path('/browse-edgar', args) 24 | end 25 | 26 | def self.for_date(date) 27 | instance = SecURI.new 28 | instance.host = 'www.sec.gov' 29 | instance.scheme = 'https' 30 | instance.path = "Archives/edgar/daily-index/#{ date.to_sec_uri_format }" 31 | instance 32 | end 33 | 34 | def self.ownership_display_uri(args = nil) 35 | build_with_path('/own-disp', args) 36 | end 37 | 38 | def self.build_with_path(path, args) 39 | instance = SecURI.new 40 | instance.path += path 41 | return instance if args.nil? 42 | options = send("handle_#{ args.class.to_s.underscore }_args", args) 43 | instance.query_values = options 44 | instance 45 | end 46 | 47 | def self.handle_string_args(string_arg) 48 | options = {} 49 | begin Float(string_arg) 50 | options[:CIK] = string_arg 51 | rescue 52 | if string_arg.length <= 5 53 | options[:CIK] = string_arg 54 | else 55 | options[:company] = string_arg.gsub(/[(,?!\''"":.)]/, '') 56 | end 57 | end 58 | options 59 | end 60 | 61 | private_class_method :handle_string_args 62 | 63 | def self.handle_hash_args(hash_arg) 64 | options = hash_arg 65 | if hash_arg[:symbol] || hash_arg[:cik] 66 | options[:CIK] = (hash_arg[:symbol] || hash_arg[:cik]) 67 | return options 68 | end 69 | options[:company] = company_name_from_hash_args(hash_arg) 70 | options 71 | end 72 | 73 | private_class_method :handle_hash_args 74 | 75 | def self.company_name_from_hash_args(args) 76 | return "#{ args[:last] } #{ args[:first] }" if args[:first] && args[:last] 77 | return args[:name].gsub(/[(,?!\''"":.)]/, '') if args[:name] 78 | end 79 | 80 | private_class_method :company_name_from_hash_args 81 | 82 | def initialize 83 | self.host = 'www.sec.gov' 84 | self.scheme = 'https' 85 | self.path = 'cgi-bin' 86 | end 87 | 88 | def []=(key, value) 89 | query_values[key] = value 90 | self 91 | end 92 | 93 | def output_atom 94 | query_values.merge!(output: 'atom') 95 | self 96 | end 97 | 98 | def to_s 99 | uri.to_s 100 | end 101 | 102 | def to_str 103 | to_s 104 | end 105 | 106 | private 107 | 108 | def uri 109 | Addressable::URI.new( 110 | host: host, 111 | scheme: scheme, 112 | path: path, 113 | query_values: query_values 114 | ) 115 | end 116 | end 117 | end 118 | -------------------------------------------------------------------------------- /spec/sec_query/entity_spec.rb: -------------------------------------------------------------------------------- 1 | include SecQuery 2 | require 'spec_helper' 3 | 4 | # Note: Shared Methods are available in spec_helper.rb 5 | 6 | describe SecQuery::Entity do 7 | 8 | describe "Company Queries", vcr: { cassette_name: "aapl"} do 9 | 10 | let(:query){{name: "Apple Inc.", sic: "3571", symbol: "aapl", cik:"0000320193"}} 11 | 12 | [:symbol, :cik, :name].each do |key| 13 | context "when quering by #{key}" do 14 | describe "as hash" do 15 | 16 | let(:entity){ SecQuery::Entity.find({ key => query[key] }) } 17 | 18 | it "should be valid" do 19 | is_valid?(entity) 20 | end 21 | 22 | it "should have a valid mailing address" do 23 | is_valid_address?(entity.mailing_address) 24 | end 25 | 26 | it "should have a valid business address" do 27 | is_valid_address?(entity.business_address) 28 | end 29 | end 30 | 31 | describe "as string" do 32 | it "should be valid" do 33 | entity = SecQuery::Entity.find(query[key]) 34 | is_valid?(entity) 35 | end 36 | end 37 | end 38 | end 39 | end 40 | 41 | describe "Company Queries - Alphabet Inc.", vcr: { cassette_name: "googl"} do 42 | 43 | let(:query){{name: "Alphabet Inc.", sic: "7370", symbol: "googl", cik:"0001652044"}} 44 | 45 | [:symbol, :cik, :name].each do |key| 46 | context "when quering by #{key}" do 47 | describe "as hash" do 48 | 49 | let(:entity){ SecQuery::Entity.find({ key => query[key] }) } 50 | 51 | it "should be valid" do 52 | is_valid?(entity) 53 | end 54 | 55 | it "should have a valid mailing address" do 56 | is_valid_address?(entity.mailing_address) 57 | end 58 | 59 | it "should have a valid business address" do 60 | is_valid_address?(entity.business_address) 61 | end 62 | end 63 | 64 | describe "as string" do 65 | it "should be valid" do 66 | entity = SecQuery::Entity.find(query[key]) 67 | is_valid?(entity) 68 | end 69 | end 70 | end 71 | end 72 | end 73 | 74 | describe "People Queries", vcr: { cassette_name: "Steve Jobs"} do 75 | 76 | let(:query){ { name: "JOBS STEVEN P", :cik => "0001007844" } } 77 | 78 | [:cik, :name].each do |key| 79 | context "when quering by #{key}" do 80 | describe "as hash" do 81 | 82 | let(:entity){ SecQuery::Entity.find({ key => query[key] }) } 83 | 84 | it "should be valid" do 85 | is_valid?(entity) 86 | end 87 | 88 | it "should have a valid mailing address" do 89 | is_valid_address?(entity.mailing_address) 90 | end 91 | 92 | it "should have a valid business address" do 93 | is_valid_address?(entity.business_address) 94 | end 95 | end 96 | 97 | describe "as string" do 98 | it "should be valid" do 99 | entity = SecQuery::Entity.find(query[key]) 100 | is_valid?(entity) 101 | end 102 | end 103 | end 104 | end 105 | end 106 | end 107 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sec_query 2 | 3 | A ruby gem for searching and retrieving data from the Security and Exchange Commission's Edgar web system. 4 | 5 | Look-up an Entity - person or company - by Central Index Key (CIK), stock symbol, company name or person (by first and last name). 6 | 7 | Additionally retrieve some, or all, Relationships, Transactions and Filings as recorded by the SEC. 8 | 9 | ## Note: 9/13/16, SEC.GOV embraces SSL! 10 | 11 | On or before Septmember 13th, 2016, the SEC.gov updated their site to use SSL (Huzzah!). Version 1.2.0 addresses this change. All versions less than 1.2.0, will cease to function as expected. Update immediately. 12 | 13 | ## Installation 14 | 15 | To install the 'sec_query' Ruby Gem run the following command at the terminal prompt. 16 | 17 | `gem install sec_query` 18 | 19 | For an example of what type of information 'sec_query' can retrieve, run the following command: 20 | 21 | `bundle exec rspec spec` 22 | 23 | If running 'sec_query' from the command prompt in irb: 24 | 25 | `irb -rubygems` 26 | 27 | `require "sec_query"` 28 | 29 | ## Functionality 30 | 31 | ### Entity: 32 | 33 | An Sec::Entity instance contains the following attributes: 34 | 35 | * cik 36 | * name 37 | * mailing_address 38 | * business_adddress 39 | * assigned_sic 40 | * assigned_sic_desc 41 | * assigned_sic_href 42 | * assitant_director 43 | * cik_href 44 | * formerly_name 45 | * state_location 46 | * state_location_href 47 | * state_of_incorporation 48 | 49 | #### Class Methods 50 | 51 | ##### .find 52 | 53 | ###### By Stock Symbol: 54 | 55 | `SecQuery::Entity.find("aapl")` 56 | 57 | Or: 58 | 59 | `SecQuery::Entity.find({:symbol=> "aapl"})` 60 | 61 | ###### By Name: 62 | 63 | `SecQuery::Entity.find("Apple, Inc.")` 64 | 65 | Or: 66 | 67 | `SecQuery::Entity.find({:name=> "Apple, Inc."})` 68 | 69 | ###### Central Index Key, CIK 70 | 71 | `SecQuery::Entity.find( "0000320193")` 72 | 73 | Or: 74 | 75 | `SecQuery::Entity.find({:cik=> "0000320193"})` 76 | 77 | ###### By First, Middle and Last Name: 78 | 79 | By First, Middle and Last Name. 80 | 81 | `SecQuery::Entity.find({:first=> "Steve", :middle=> "P", :last=> "Jobs"})` 82 | 83 | Middle initial or name is optional, but helps when there are multiple results for First and Last Name. 84 | 85 | #### Instance Methods 86 | 87 | ##### .filings 88 | 89 | Returns a list of Sec::Filing instances for an Sec::Entity 90 | 91 | ### SecQuery::Filing 92 | 93 | SecQuery::Filing instance may contains the following attributes: 94 | 95 | * cik 96 | * title 97 | * symmary 98 | * link 99 | * term 100 | * date 101 | * file_id 102 | * detail 103 | 104 | #### Class Methods 105 | 106 | ##### .recent 107 | 108 | Find recent filings: 109 | 110 | ``` 111 | filings = [] 112 | SecQuery::Filing.recent(start: 0, count: 10, limit: 10) do |filing| 113 | filings.push filing 114 | end 115 | ``` 116 | 117 | Requires a block. Returns the most recent filings. Use start, count and limit to iterate through recent filings. 118 | 119 | ### SecQuery::FilingDetail 120 | Represents the detail page for a given filing. 121 | Ex: [Filing Detail page](https://www.sec.gov/Archives/edgar/data/320193/000032019317000070/0000320193-17-000070-index.htm) of Apple's Annual Report from 2017 122 | 123 | #### Instance Methods 124 | * link 125 | * filing_date 126 | * accepted_date 127 | * period_of_report 128 | * sec_access_number 129 | * document_count 130 | * format_files 131 | * data_files 132 | 133 | #### Class Methods 134 | ##### .fetch 135 | ``` 136 | appl_10k_details_url = 'https://www.sec.gov/Archives/edgar/data/320193/000032019317000070/0000320193-17-000070-index.htm' 137 | filing_detail = SecQuery::FilingDetail.fetch(appl_10k_details_url) 138 | ``` 139 | 140 | ## To Whom It May Concern at the SEC 141 | 142 | Over the last decade, I have gotten to know Edgar quite extensively and I have grown quite fond of it and the information it contains. So it is with my upmost respect that I make the following suggestions: 143 | 144 | * Edgar is in dire need of a proper, published RESTful API. 145 | * Edgar needs to be able to return XML or JSON for any API query. 146 | * Edgar's search engine is atrocious; Rigid to the point of being almost unusable. 147 | * Edgar only goes back as far as 1993, and in most cases, only provides extensive information after 2000. 148 | 149 | It is my humble opinion that these four issues are limiting the effectiveness of Edgar and the SEC in general. The information the SEC contains is vitally important to National Security and the stability of the American Economy and the World. It is time to make all information available and accessible. 150 | 151 | ## License 152 | 153 | Copyright (c) 2011 Ty Rauber 154 | 155 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 156 | 157 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 158 | 159 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 160 | -------------------------------------------------------------------------------- /spec/sec_query/filing_spec.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | include SecQuery 3 | require 'spec_helper' 4 | 5 | describe SecQuery::Filing do 6 | it '::uri_for_recent' do 7 | expect(SecQuery::Filing.uri_for_recent.to_s) 8 | .to eq('https://www.sec.gov/cgi-bin/browse-edgar?action=getcurrent&company&count=100&output=atom&owner=include&start=0') 9 | end 10 | 11 | it '::uri_for_cik' do 12 | expect(SecQuery::Filing.uri_for_cik('testing').to_s) 13 | .to eq('https://www.sec.gov/cgi-bin/browse-edgar?CIK=testing&action=getcompany&company&count=100&output=atom&owner=include&start=0') 14 | end 15 | 16 | describe '::for_date', vcr: { cassette_name: 'idx' } do 17 | let(:index) { SecQuery::Filing.for_date(Date.parse('20161230')) } 18 | 19 | it 'parses all of the filings' do 20 | expect(index.count).to eq(2554) 21 | end 22 | 23 | it 'correctly parses out the link' do 24 | expect(index.first.link) 25 | .to match(/https:\/\/www.sec.gov\/Archives\/edgar\/data\//) 26 | end 27 | 28 | it 'correctly parses out the cik' do 29 | expect(index.first.cik).to eq('1605941') 30 | end 31 | 32 | it 'correctly parses out the term' do 33 | expect(index.first.term).to eq('N-CSR') 34 | end 35 | end 36 | 37 | describe '::recent', vcr: { cassette_name: 'recent' } do 38 | let(:filings) { [] } 39 | 40 | before(:each) do 41 | SecQuery::Filing.recent(start: 0, count: 10, limit: 10) do |filing| 42 | filings.push filing 43 | end 44 | end 45 | 46 | it 'should accept options' do 47 | expect(filings.count).to eq(10) 48 | end 49 | 50 | it 'should have filing attributes', vcr: { cassette_name: 'recent' } do 51 | filings.each do |filing| 52 | expect(filing.cik).to be_present 53 | expect(filing.title).to be_present 54 | expect(filing.summary).to be_present 55 | expect(filing.link).to be_present 56 | expect(filing.term).to be_present 57 | expect(filing.date).to be_present 58 | expect(filing.file_id).to be_present 59 | end 60 | end 61 | end 62 | 63 | describe "::find" do 64 | shared_examples_for "it found filings" do 65 | it "should return an array of filings" do 66 | expect(filings).to be_kind_of(Array) 67 | end 68 | 69 | it "the filings should be valid" do 70 | is_valid_filing?(filings.first) 71 | end 72 | end 73 | 74 | let(:cik){"0000320193"} 75 | 76 | context "when querying by cik" do 77 | let(:filings){ SecQuery::Filing.find(cik) } 78 | 79 | describe "Filings", vcr: { cassette_name: "Steve Jobs"} do 80 | it_behaves_like "it found filings" 81 | end 82 | end 83 | 84 | context "when querying cik and by type param" do 85 | let(:filings){ SecQuery::Filing.find(cik, 0, 40, { type: "10-K" }) } 86 | 87 | describe "Filings", vcr: { cassette_name: "Steve Jobs"} do 88 | it_behaves_like "it found filings" 89 | 90 | it "should only return filings of type" do 91 | expect(filings.first.term).to eq "10-K" 92 | end 93 | end 94 | end 95 | 96 | describe '#content', vcr: { cassette_name: 'content' } do 97 | let(:index) { SecQuery::Filing.for_date(Date.parse('20161230')) } 98 | 99 | it 'returns content of the filing by requesting the link' do 100 | f = Filing.new(link: index.first.link) 101 | expect(f.content).to match(/^()/) 102 | end 103 | end 104 | 105 | describe "::last", vcr: { cassette_name: "Steve Jobs"} do 106 | let(:cik) { "0000320193" } 107 | 108 | context 'when querying by cik' do 109 | let(:filing) { SecQuery::Filing.last(cik) } 110 | 111 | it 'returns the first filing' do 112 | expect(filing).to be_kind_of(SecQuery::Filing) 113 | is_valid_filing?(filing) 114 | end 115 | end 116 | 117 | context 'when querying cik and by type param' do 118 | let(:filing) { SecQuery::Filing.last(cik,{ type: "10-K" }) } 119 | 120 | describe "Filings", vcr: { cassette_name: "Steve Jobs"} do 121 | it "should return filing of type 10-K" do 122 | expect(filing).to be_kind_of(SecQuery::Filing) 123 | expect(filing.term).to eq "10-K" 124 | end 125 | end 126 | end 127 | end 128 | end 129 | 130 | describe '#detail', vcr: { cassette_name: 'Steve Jobs'} do 131 | let(:cik) { '0000320193' } 132 | let(:filing) { SecQuery::Filing.find(cik, 0, 1, {type: type}).first } 133 | subject(:filing_detail) { filing.detail } 134 | 135 | shared_examples 'Valid SecQuery::FilingDetail' do |filing_type| 136 | it 'valid filing detail' do 137 | expect(filing_detail).to be_a SecQuery::FilingDetail 138 | expect((Date.strptime(subject.filing_date, '%Y-%m-%d') rescue false)).to be_a Date 139 | expect((DateTime.strptime(subject.accepted_date, '%Y-%m-%d %H:%M:%S') rescue false)).to be_a DateTime 140 | expect((Date.strptime(subject.period_of_report, '%Y-%m-%d') rescue false)).to be_a Date 141 | expect(filing_detail.sec_access_number).to match /^[0-9]{10}-[0-9]{2}-[0-9]{6}$/ # ex: 0000320193-18-000100 142 | expect(filing_detail.document_count).to be > 0 143 | 144 | 145 | expect(filing_detail.data_files).not_to be_empty if filing_type == '10-K' 146 | expect(filing_detail.data_files).to be_empty if filing_type == '4' 147 | expect(filing_detail.format_files).not_to be_empty 148 | end 149 | end 150 | 151 | context '10-K' do 152 | let(:type) { '10-K' } 153 | it_behaves_like 'Valid SecQuery::FilingDetail', '10-K' 154 | end 155 | 156 | context 'Form 4' do 157 | let(:type) { '4' } 158 | it_behaves_like 'Valid SecQuery::FilingDetail', '4' 159 | end 160 | end 161 | end 162 | -------------------------------------------------------------------------------- /lib/sec_query/filing.rb: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | 3 | module SecQuery 4 | # => SecQuery::Filing 5 | # SecQuery::Filing requests and parses filings for any given SecQuery::Entity 6 | class Filing 7 | COLUMNS = [:cik, :title, :summary, :link, :term, :date, :file_id] 8 | 9 | attr_accessor(*COLUMNS) 10 | 11 | def initialize(filing) 12 | COLUMNS.each do |column| 13 | instance_variable_set("@#{ column }", filing[column]) 14 | end 15 | end 16 | 17 | def detail 18 | @detail ||= FilingDetail.fetch(@link) 19 | end 20 | 21 | def self.fetch(uri, &blk) 22 | RestClient::Request.execute(method: :get, url: uri.to_s, timeout: 10) do |response, request, result, &block| 23 | parse_rss(response.body, &blk) 24 | end 25 | end 26 | 27 | def self.recent(options = {}, &blk) 28 | start = options.fetch(:start, 0) 29 | count = options.fetch(:count, 100) 30 | limit = options.fetch(:limit, 100) 31 | limited_count = [limit - start, count].min 32 | fetch(uri_for_recent(start, limited_count), &blk) 33 | start += count 34 | return if start >= limit 35 | recent({ start: start, count: count, limit: limit }, &blk) 36 | rescue OpenURI::HTTPError 37 | return 38 | end 39 | 40 | def self.for_cik(cik, options = {}, &blk) 41 | start = options.fetch(:start, 0) 42 | count = options.fetch(:count, 100) 43 | limit = options.fetch(:limit, 100) 44 | fetch(uri_for_cik(cik, start, count), &blk) 45 | start += count 46 | return if start >= limit 47 | for_cik(cik, { start: start, count: count, limit: limit }, &blk) 48 | rescue OpenURI::HTTPError 49 | return 50 | end 51 | 52 | def self.for_date(date, &blk) 53 | url = SecURI.for_date(date).to_s 54 | RestClient::Request.execute(method: :get, url: url, timeout: 10) do |response, request, result, &block| 55 | filings_for_index(response.body).each(&blk) 56 | end 57 | end 58 | 59 | def self.filings_for_index(index) 60 | [].tap do |filings| 61 | content_section = false 62 | index.each_line do |row| 63 | content_section = true if row.include?('-------------') 64 | next if !content_section || row.include?('------------') 65 | filing = filing_for_index_row(row) 66 | filings << filing unless filing.nil? 67 | end 68 | end 69 | end 70 | 71 | def self.filing_for_index_row(row) 72 | data = row.split(/ /).reject(&:blank?).map(&:strip) 73 | data = row.split(/ /).reject(&:blank?).map(&:strip) if data.count == 4 74 | return nil unless data[0] and data[1] and data[2] and data[3] and data[4] 75 | data.delete_at(1) if data[1][0] == '/' 76 | return nil unless Regexp.new(/\d{8}/).match(data[3]) 77 | return nil if data[4] == nil 78 | unless data[4][0..3] == 'http' 79 | data[4] = "https://www.sec.gov/Archives/#{ data[4] }" 80 | end 81 | begin 82 | Date.parse(data[3]) 83 | rescue ArgumentError 84 | return nil 85 | end 86 | Filing.new( 87 | term: data[1], 88 | cik: data[2], 89 | date: Date.parse(data[3]), 90 | link: data[4] 91 | ) 92 | end 93 | 94 | def self.uri_for_recent(start = 0, count = 100) 95 | SecURI.browse_edgar_uri( 96 | action: :getcurrent, 97 | owner: :include, 98 | output: :atom, 99 | start: start, 100 | count: count 101 | ) 102 | end 103 | 104 | def self.uri_for_cik(cik, start = 0, count = 100) 105 | SecURI.browse_edgar_uri( 106 | action: :getcompany, 107 | owner: :include, 108 | output: :atom, 109 | start: start, 110 | count: count, 111 | CIK: cik 112 | ) 113 | end 114 | 115 | def self.parse_rss(rss, &blk) 116 | feed = RSS::Parser.parse(rss, false) 117 | feed.entries.each do |entry| 118 | filing = Filing.new( 119 | cik: entry.title.content.match(/\((\w{10})\)/)[1], 120 | file_id: entry.id.content.split('=').last, 121 | term: entry.category.term, 122 | title: entry.title.content, 123 | summary: entry.summary.content, 124 | date: DateTime.parse(entry.updated.content.to_s), 125 | link: entry.link.href.gsub('-index.htm', '.txt') 126 | ) 127 | blk.call(filing) 128 | end 129 | end 130 | 131 | def self.find(cik, start = 0, count = 80, args={}) 132 | temp = {} 133 | temp[:url] = SecURI.browse_edgar_uri({cik: cik}) 134 | temp[:url][:action] = :getcompany 135 | temp[:url][:start] = start 136 | temp[:url][:count] = count 137 | args.each {|k,v| temp[:url][k]=v } 138 | 139 | response = Entity.query(temp[:url].output_atom.to_s) 140 | document = Nokogiri::HTML(response) 141 | parse(cik, document) 142 | end 143 | 144 | def self.last(cik, args = {}) 145 | filings = find(cik, 0, 1, args) 146 | filings.is_a?(Array) ? filings.first : nil 147 | end 148 | 149 | def self.parse(cik, document) 150 | filings = [] 151 | if document.xpath('//content').to_s.length > 0 152 | document.xpath('//content').each do |e| 153 | if e.xpath('//content/accession-number').to_s.length > 0 154 | content = Hash.from_xml(e.to_s)['content'] 155 | content[:cik] = cik 156 | content[:file_id] = content.delete('accession_nunber') 157 | content[:date] = content.delete('filing_date') 158 | content[:link] = content.delete('filing_href') 159 | content[:term] = content.delete('filing_type') 160 | content[:title] = content.delete('form_name') 161 | filings << Filing.new(content) 162 | end 163 | end 164 | end 165 | filings 166 | end 167 | 168 | def content(&error_blk) 169 | @content ||= RestClient.get(self.link) 170 | rescue RestClient::ResourceNotFound => e 171 | puts "404 Resource Not Found: Bad link #{ self.link }" 172 | if block_given? 173 | error_blk.call(e, self) 174 | else 175 | raise e 176 | end 177 | end 178 | end 179 | end 180 | --------------------------------------------------------------------------------