├── lib
├── wwmd
│ ├── page.rb
│ ├── class_extensions.rb
│ ├── viewstate
│ │ ├── vs_stubs
│ │ │ ├── vs_indexed_string.rb
│ │ │ ├── vs_type.rb
│ │ │ ├── vs_indexed_string_ref.rb
│ │ │ ├── vs_read_types.rb
│ │ │ ├── vs_pair.rb
│ │ │ ├── vs_int_enum.rb
│ │ │ ├── vs_triplet.rb
│ │ │ ├── vs_binary_serialized.rb
│ │ │ ├── vs_value.rb
│ │ │ ├── vs_unit.rb
│ │ │ ├── vs_list.rb
│ │ │ ├── vs_string_formatted.rb
│ │ │ ├── vs_string.rb
│ │ │ ├── vs_array.rb
│ │ │ ├── vs_string_array.rb
│ │ │ ├── vs_stub_helpers.rb
│ │ │ ├── vs_hashtable.rb
│ │ │ ├── vs_hybrid_dict.rb
│ │ │ ├── vs_read_value.rb
│ │ │ └── vs_sparse_array.rb
│ │ ├── vs_stubs.rb
│ │ ├── viewstate_yaml.rb
│ │ ├── viewstate.rb
│ │ ├── viewstate_types.rb
│ │ ├── viewstate_from_xml.rb
│ │ ├── viewstate_utils.rb
│ │ └── viewstate_deserializer_methods.rb
│ ├── wwmd_puts.rb
│ ├── class_extensions
│ │ ├── extensions_external.rb
│ │ ├── extensions_nilclass.rb
│ │ ├── mixins_string_encoding.rb
│ │ ├── extensions_encoding.rb
│ │ ├── extensions_rbkb.rb
│ │ └── extensions_base.rb
│ ├── page
│ │ ├── auth.rb
│ │ ├── helpers.rb
│ │ ├── inputs.rb
│ │ ├── html2text_nokogiri.rb
│ │ ├── constants.rb
│ │ ├── reporting_helpers.rb
│ │ ├── html2text_hpricot.rb
│ │ ├── form.rb
│ │ ├── parsing_convenience.rb
│ │ ├── headers.rb
│ │ ├── irb_helpers.rb
│ │ ├── spider.rb
│ │ ├── scrape.rb
│ │ ├── page.rb
│ │ └── form_array.rb
│ ├── viewstate.rb
│ ├── wwmd_utils.rb
│ ├── wwmd_config.rb
│ ├── urlparse.rb
│ └── guid.rb
└── wwmd.rb
├── spec
├── README
├── spider_csrf_test.spec
├── form_array.spec
└── urlparse_test.spec
├── tasks
├── bones.rake
├── notes.rake
├── zentest.rake
├── test.rake
├── git.rake
├── post_load.rake
├── rdoc.rake
├── spec.rake
├── rubyforge.rake
├── ann.rake
├── gem.rake
└── setup.rb
├── examples
├── config_example.yaml
└── wwmd_example.rb
├── Rakefile
├── History.txt
├── README.rdoc
└── wwmd.gemspec
/lib/wwmd/page.rb:
--------------------------------------------------------------------------------
1 | require 'wwmd/wwmd_utils'
2 | require 'wwmd/wwmd_config'
3 | require 'wwmd/page/page'
4 |
--------------------------------------------------------------------------------
/spec/README:
--------------------------------------------------------------------------------
1 | None of this was developed with unit tests
2 |
3 | These are tests written against new functionality
4 |
--------------------------------------------------------------------------------
/lib/wwmd/class_extensions.rb:
--------------------------------------------------------------------------------
1 | Dir.glob(::File.join(::File.dirname(__FILE__),"class_extensions/","*.rb")).each { |rb| require rb }
2 |
3 |
--------------------------------------------------------------------------------
/lib/wwmd/viewstate/vs_stubs/vs_indexed_string.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | class VSStubs::VSIndexedString < VSStubs::VSString
3 | def serialize; super; end
4 | def to_xml; super; end
5 | end
6 | end
7 |
--------------------------------------------------------------------------------
/lib/wwmd/wwmd_puts.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | attr_accessor :console
3 | attr_accessor :debug
4 | @console = false
5 | @debug = false
6 | def putd(*args); puts *args if WWMD::debug; end
7 | def putx(*args); puts *args if WWMD::console; end
8 | def putw(*args); puts *args if WWMD::console; end
9 | end
10 |
--------------------------------------------------------------------------------
/lib/wwmd/class_extensions/extensions_external.rb:
--------------------------------------------------------------------------------
1 | module REXML
2 | class Element
3 |
4 | # pretty print (indent=0) to stdout or filename [fn]
5 | def pp(fn=nil)
6 | tmp = ""
7 | self.write(tmp,0)
8 | if fn
9 | tmp.write(fn)
10 | return fn
11 | else
12 | return tmp
13 | end
14 | nil
15 | end
16 |
17 | end
18 | end
19 |
--------------------------------------------------------------------------------
/lib/wwmd/page/auth.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | class Page
3 |
4 | # does this request have an authenticate header?
5 | def auth?
6 | return false if self.code != 401
7 | count = 0
8 | self.header_data.each do |i|
9 | if i[0] =~ /www-authenticate/i
10 | count += 1
11 | end
12 | end
13 | return (count > 0)
14 | end
15 |
16 | end
17 | end
18 |
--------------------------------------------------------------------------------
/lib/wwmd/class_extensions/extensions_nilclass.rb:
--------------------------------------------------------------------------------
1 | # I really hate this
2 | class NilClass#:nodoc:
3 | def empty?; return true; end
4 | def size; return 0; end
5 | def to_form; return FormArray.new([]); end
6 | def clop; return nil; end
7 | def inner_html; return nil; end
8 | def get_attribute(*args); return nil; end
9 | def grep(*args); return []; end
10 | def escape(*args); return nil; end
11 | end
12 |
--------------------------------------------------------------------------------
/tasks/bones.rake:
--------------------------------------------------------------------------------
1 |
2 | if HAVE_BONES
3 |
4 | namespace :bones do
5 |
6 | desc 'Show the PROJ open struct'
7 | task :debug do |t|
8 | atr = if t.application.top_level_tasks.length == 2
9 | t.application.top_level_tasks.pop
10 | end
11 |
12 | if atr then Bones::Debug.show_attr(PROJ, atr)
13 | else Bones::Debug.show PROJ end
14 | end
15 |
16 | end # namespace :bones
17 |
18 | end # HAVE_BONES
19 |
20 | # EOF
21 |
--------------------------------------------------------------------------------
/lib/wwmd/viewstate/vs_stubs/vs_type.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | class VSStubs::VSType
3 | include VSStubHelpers
4 |
5 | attr_accessor :value
6 | attr_reader :typeref
7 | attr_reader :typeval
8 |
9 | def initialize(typeref,typeval)
10 | @typeref = typeref
11 | @typeval = typeval
12 | end
13 |
14 | def serialize
15 | super # cheat opcode + typeref + typeval
16 | end
17 |
18 | def to_xml
19 | super
20 | end
21 |
22 | end
23 | end
24 |
--------------------------------------------------------------------------------
/lib/wwmd/viewstate/vs_stubs/vs_indexed_string_ref.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | class VSStubs::VSIndexedStringRef
3 | include VSStubHelpers
4 |
5 | attr_reader :value
6 |
7 | def initialize(ref)
8 | @value = ref
9 | end
10 |
11 | def serialize
12 | stack = super
13 | stack << self.write_int(@value)
14 | return stack
15 | end
16 |
17 | def to_xml
18 | xml = super
19 | xml.add_text(self.value.to_s)
20 | xml
21 | end
22 |
23 | end
24 | end
25 |
--------------------------------------------------------------------------------
/lib/wwmd/viewstate.rb:
--------------------------------------------------------------------------------
1 | require 'rubygems'
2 | require 'nokogiri'
3 | require 'rexml/document'
4 | require 'htmlentities'
5 | require 'wwmd/viewstate/viewstate_utils'
6 | module WWMD
7 | class ViewState
8 | include ViewStateUtils
9 | end
10 | end
11 | require 'wwmd/class_extensions'
12 | require 'wwmd/viewstate/viewstate'
13 | require 'wwmd/viewstate/viewstate_types'
14 | require 'wwmd/viewstate/viewstate_yaml'
15 | require 'wwmd/viewstate/viewstate_deserializer_methods'
16 | require 'wwmd/viewstate/viewstate_from_xml'
17 | require 'wwmd/viewstate/vs_stubs'
18 |
--------------------------------------------------------------------------------
/lib/wwmd/viewstate/vs_stubs/vs_read_types.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | class VSStubs::VSInt16 < VSStubs::VSReadValue; end
3 | class VSStubs::VSInt32 < VSStubs::VSReadValue; end
4 | class VSStubs::VSByte < VSStubs::VSReadValue; end
5 | class VSStubs::VSChar < VSStubs::VSReadValue; end
6 | class VSStubs::VSDateTime < VSStubs::VSReadValue; end
7 | class VSStubs::VSDouble < VSStubs::VSReadValue; end
8 | class VSStubs::VSSingle < VSStubs::VSReadValue; end
9 | class VSStubs::VSColor < VSStubs::VSReadValue; end
10 | class VSStubs::VSKnownColor < VSStubs::VSReadValue; end
11 | end
12 |
--------------------------------------------------------------------------------
/lib/wwmd/viewstate/vs_stubs/vs_pair.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | class VSStubs::VSPair
3 | include VSStubHelpers
4 |
5 | attr_accessor :value
6 |
7 | def initialize(obj1,obj2)
8 | @value = []
9 | @value << obj1
10 | @value << obj2
11 | end
12 |
13 | def serialize
14 | stack = super
15 | self.value.each do |v|
16 | stack << v.serialize
17 | end
18 | return stack
19 | end
20 |
21 | def to_xml
22 | xml = super
23 | self.value.each do |v|
24 | xml.add_element(v.to_xml)
25 | end
26 | xml
27 | end
28 | end
29 | end
30 |
--------------------------------------------------------------------------------
/lib/wwmd/viewstate/vs_stubs/vs_int_enum.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | class VSStubs::VSIntEnum
3 | include VSStubHelpers
4 |
5 | attr_accessor :value
6 | attr_reader :typeref
7 | attr_reader :typeval
8 |
9 | def initialize(typeref,typeval,index)
10 | @typeref = typeref
11 | @typeval = typeval
12 | @value = index
13 | end
14 |
15 | def serialize
16 | stack = super
17 | stack << self.write_7bit_encoded_int(self.value)
18 | end
19 |
20 | def to_xml
21 | xml = super
22 | xml.add_text(self.value.to_s)
23 | xml
24 | end
25 |
26 | end
27 | end
28 |
--------------------------------------------------------------------------------
/lib/wwmd/viewstate/vs_stubs/vs_triplet.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | class VSStubs::VSTriplet
3 | include VSStubHelpers
4 |
5 | attr_accessor :value
6 |
7 | def initialize(obj1,obj2,obj3)
8 | @value = []
9 | @value << obj1
10 | @value << obj2
11 | @value << obj3
12 | end
13 |
14 | def serialize
15 | stack = super
16 | self.value.each do |v|
17 | stack << v.serialize
18 | end
19 | return stack
20 | end
21 |
22 | def to_xml
23 | xml = super
24 | self.value.each do |v|
25 | xml.add_element(v.to_xml)
26 | end
27 | xml
28 | end
29 |
30 | end
31 | end
32 |
--------------------------------------------------------------------------------
/lib/wwmd/viewstate/vs_stubs/vs_binary_serialized.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | class VSStubs::VSBinarySerialized
3 | include VSStubHelpers
4 |
5 | attr_accessor :value
6 |
7 | def initialize()
8 | @value = ''
9 | end
10 |
11 | def set(str)
12 | @value = str
13 | end
14 |
15 | def serialize
16 | stack = super
17 | stack << self.write_7bit_encoded_int(self.size)
18 | stack << self.value
19 | return stack
20 | end
21 |
22 | def to_xml
23 | xml = super
24 | xml.add_attribute("encoding","base64")
25 | xml.add_text(self.value.b64e)
26 | xml
27 | end
28 |
29 | end
30 | end
31 |
--------------------------------------------------------------------------------
/lib/wwmd/viewstate/vs_stubs/vs_value.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | class VSStubs::VSValue
3 | include VSStubHelpers
4 |
5 | attr_accessor :value
6 |
7 | # gin up all the single byte values
8 | def initialize(str)
9 | @value = str
10 | end
11 |
12 | def to_s
13 | @value.hexify
14 | end
15 |
16 | def to_sym
17 | VIEWSTATE_TYPES[opcode].to_sym
18 | end
19 |
20 | def opcode
21 | @value
22 | end
23 |
24 | def serialize
25 | super # cheat... just return opcode
26 | end
27 |
28 | def to_xml
29 | xml = super
30 | xml.add_text(self.to_sym.to_s)
31 | xml
32 | end
33 |
34 | end
35 | end
36 |
--------------------------------------------------------------------------------
/lib/wwmd/viewstate/vs_stubs/vs_unit.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | class VSStubs::VSUnit
3 | include VSStubHelpers
4 |
5 | attr_reader :dword
6 | attr_reader :word
7 | attr_reader :value
8 |
9 | def initialize(dword,word)
10 | @dword = dword
11 | @word = word
12 | @value = ''
13 | end
14 |
15 | def serialize
16 | stack = super
17 | stack << write_double(self.dword)
18 | stack << write_single(self.word)
19 | return stack
20 | end
21 |
22 | def to_xml
23 | xml = super
24 | xml.add_attribute("dword",self.dword.to_s)
25 | xml.add_attribute("word",self.word.to_s)
26 | xml
27 | end
28 |
29 | end
30 | end
31 |
--------------------------------------------------------------------------------
/examples/config_example.yaml:
--------------------------------------------------------------------------------
1 | #---
2 | :base_url: "http://www.example.com"
3 | :header_file: "./HEADERS.default" # argv
4 | :username: "username" # argv
5 | :password: "password" # argv
6 |
7 | # opts for spider (only spider local urls)
8 | :spider_local_only: true
9 |
10 | # opts for curl object
11 | # set max_redirects and follow_location (follows 302s)
12 | :follow_location: true
13 | :max_redirects: 20
14 |
15 | # --use_proxy=host:port overrides both of these settings
16 | # to use a proxy (I use burp and so should you)
17 | :use_proxy: false
18 | :proxy_url: "localhost:8080"
19 |
20 | # cookies (where are we going to save our cookies?)
21 | :enable_cookies: true
22 | :cookiejar: "./__cookiejar"
23 |
24 | #+++
25 |
--------------------------------------------------------------------------------
/lib/wwmd/viewstate/vs_stubs/vs_list.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | class VSStubs::VSList
3 | include VSStubHelpers
4 |
5 | attr_accessor :value
6 |
7 | def initialize()
8 | @value = []
9 | end
10 |
11 | def add(obj)
12 | @value << obj
13 | end
14 |
15 | def serialize
16 | stack = super
17 | stack << self.write_7bit_encoded_int(self.size)
18 | self.value.each do |v|
19 | stack << v.serialize
20 | end
21 | return stack
22 | end
23 |
24 | def to_xml
25 | xml = super
26 | xml.add_attribute("size",self.value.size.to_s)
27 | self.value.each do |v|
28 | xml.add_element(v.to_xml)
29 | end
30 | xml
31 | end
32 |
33 | end
34 | end
35 |
--------------------------------------------------------------------------------
/lib/wwmd/wwmd_utils.rb:
--------------------------------------------------------------------------------
1 | module WWMDUtils
2 |
3 | def self.header_array_from_file(filename)
4 | ret = Hash.new
5 | File.readlines(filename).each do |line|
6 | a = line.chomp.split(/\t/,2)
7 | ret[a[0]] = a[1]
8 | end
9 | return ret
10 | end
11 |
12 | def self.ranstr(len=8,digits=false)
13 | chars = ("a".."z").to_a
14 | chars += ("0".."9").to_a if digits
15 | ret = ""
16 | 1.upto(len) { |i| ret << chars[rand(chars.size-1)] }
17 | return ret
18 | end
19 |
20 | def self.rannum(len=8,hex=false)
21 | chars = ("0".."9").to_a
22 | chars += ("A".."F").to_a if hex
23 | ret = ""
24 | 1.upto(len) { |i| ret << chars[rand(chars.size-1)] }
25 | return ret
26 | end
27 |
28 | end
29 |
--------------------------------------------------------------------------------
/tasks/notes.rake:
--------------------------------------------------------------------------------
1 |
2 | if HAVE_BONES
3 |
4 | desc "Enumerate all annotations"
5 | task :notes do |t|
6 | id = if t.application.top_level_tasks.length > 1
7 | t.application.top_level_tasks.slice!(1..-1).join(' ')
8 | end
9 | Bones::AnnotationExtractor.enumerate(
10 | PROJ, PROJ.notes.tags.join('|'), id, :tag => true)
11 | end
12 |
13 | namespace :notes do
14 | PROJ.notes.tags.each do |tag|
15 | desc "Enumerate all #{tag} annotations"
16 | task tag.downcase.to_sym do |t|
17 | id = if t.application.top_level_tasks.length > 1
18 | t.application.top_level_tasks.slice!(1..-1).join(' ')
19 | end
20 | Bones::AnnotationExtractor.enumerate(PROJ, tag, id)
21 | end
22 | end
23 | end
24 |
25 | end # if HAVE_BONES
26 |
27 | # EOF
28 |
--------------------------------------------------------------------------------
/lib/wwmd/viewstate/vs_stubs/vs_string_formatted.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | class VSStubs::VSStringFormatted
3 | include VSStubHelpers
4 |
5 | attr_accessor :value
6 | attr_reader :typeref
7 | attr_reader :typeval
8 |
9 | def initialize(typeref,typeval,str)
10 | @typeref = typeref
11 | @typeval = typeval
12 | @value = str
13 | end
14 |
15 | def serialize
16 | stack = super
17 | stack << self.write_7bit_encoded_int(self.size)
18 | stack << self.value
19 | end
20 |
21 | def to_xml
22 | xml = super
23 | xml.add_element(VSStubs::VSString.new(self.value).to_xml)
24 | xml
25 | end
26 |
27 | def from_xml
28 | # deserialize convenience VSString properly
29 | end
30 |
31 | end
32 | end
33 |
--------------------------------------------------------------------------------
/lib/wwmd/viewstate/vs_stubs/vs_string.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | class VSStubs::VSString
3 | include VSStubHelpers
4 |
5 | attr_accessor :value
6 |
7 | def initialize(val)
8 | @value = val
9 | end
10 |
11 | def serialize
12 | stack = super
13 | stack << self.write_7bit_encoded_int(self.size)
14 | stack << self.value
15 | return stack
16 | end
17 |
18 | def to_xml
19 | xml = super
20 | # emit quoted-printable if we need to
21 | if self.value =~ /[^\x20-\x7e]/
22 | # xml.add_attribute("encoding","quoted-printable")
23 | # xml.add_text(self.value.to_qp)
24 | xml.add_attribute("encoding","urlencoded")
25 | xml.add_text(self.value.escape(/[^\x20-\x7e]/))
26 | else
27 | xml.add_text(self.value)
28 | end
29 | xml
30 | end
31 |
32 | end
33 | end
34 |
--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | # Look in the tasks/setup.rb file for the various options that can be
2 | # configured in this Rakefile. The .rake files in the tasks directory
3 | # are where the options are used.
4 |
5 | begin
6 | require 'bones'
7 | Bones.setup
8 | rescue LoadError
9 | begin
10 | load 'tasks/setup.rb'
11 | rescue LoadError
12 | raise RuntimeError, '### please install the "bones" gem ###'
13 | end
14 | end
15 |
16 | ensure_in_path 'lib'
17 | require 'wwmd'
18 |
19 | task :default => 'spec:run'
20 |
21 | PROJ.name = 'wwmd'
22 | PROJ.authors = 'Michael L. Tracy'
23 | PROJ.email = 'mtracy@matasano.com'
24 | PROJ.url = 'http://github.com/miketracy/wwmd/tree/master'
25 | PROJ.version = WWMD::VERSION
26 | #PROJ.rubyforge.name = 'wwmd'
27 |
28 | PROJ.spec.opts << '--color'
29 |
30 | depend_on 'ruby-debug'
31 | depend_on 'curb'
32 | depend_on 'nokogiri'
33 |
34 | # EOF
35 |
--------------------------------------------------------------------------------
/lib/wwmd/viewstate/vs_stubs/vs_array.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | class VSStubs::VSArray
3 | include VSStubHelpers
4 |
5 | attr_accessor :value
6 | attr_reader :typeref
7 | attr_reader :typeval
8 |
9 | def initialize(typeref,typeval)
10 | @typeref = typeref
11 | @typeval = typeval
12 | @value = []
13 | end
14 |
15 | def add(obj)
16 | @value << obj
17 | end
18 |
19 | def serialize
20 | stack = super
21 | stack << self.write_7bit_encoded_int(self.value.size)
22 | self.value.each do |v|
23 | stack << v.serialize
24 | end
25 | return stack
26 | end
27 |
28 | def to_xml
29 | xml = super
30 | xml.add_attribute("size", self.value.size.to_s)
31 | self.value.each do |v|
32 | xml.add_element(v.to_xml)
33 | end
34 | xml
35 | end
36 |
37 | end
38 | end
39 |
--------------------------------------------------------------------------------
/tasks/zentest.rake:
--------------------------------------------------------------------------------
1 | if HAVE_ZENTEST
2 |
3 | # --------------------------------------------------------------------------
4 | if test(?e, PROJ.test.file) or not PROJ.test.files.to_a.empty?
5 | require 'autotest'
6 |
7 | namespace :test do
8 | task :autotest do
9 | Autotest.run
10 | end
11 | end
12 |
13 | desc "Run the autotest loop"
14 | task :autotest => 'test:autotest'
15 |
16 | end # if test
17 |
18 | # --------------------------------------------------------------------------
19 | if HAVE_SPEC_RAKE_SPECTASK and not PROJ.spec.files.to_a.empty?
20 | require 'autotest/rspec'
21 |
22 | namespace :spec do
23 | task :autotest do
24 | load '.autotest' if test(?f, '.autotest')
25 | Autotest::Rspec.run
26 | end
27 | end
28 |
29 | desc "Run the autotest loop"
30 | task :autotest => 'spec:autotest'
31 |
32 | end # if rspec
33 |
34 | end # if HAVE_ZENTEST
35 |
36 | # EOF
37 |
--------------------------------------------------------------------------------
/lib/wwmd/viewstate/vs_stubs/vs_string_array.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | class VSStubs::VSStringArray
3 | include VSStubHelpers
4 |
5 | attr_accessor :value
6 |
7 | def initialize()
8 | @value = []
9 | end
10 |
11 | def add(obj)
12 | @value << obj
13 | end
14 |
15 | def serialize
16 | stack = super
17 | stack << self.write_7bit_encoded_int(self.size)
18 | self.value.each do |v|
19 | stack << self.write_7bit_encoded_int(v.size)
20 | stack << v
21 | end
22 | return stack
23 | end
24 |
25 | def to_xml
26 | xml = super
27 | xml.add_attribute("size",self.value.size.to_s)
28 | self.value.each do |v|
29 | xml.add_element(VSStubs::VSString.new(v).to_xml)
30 | end
31 | xml
32 | end
33 |
34 | def from_xml
35 | # serliazed with VSString (for convenience)
36 | # make sure not to deserialize the opcode when you write this out
37 | end
38 | end
39 | end
40 |
--------------------------------------------------------------------------------
/lib/wwmd/viewstate/vs_stubs/vs_stub_helpers.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | module VSStubHelpers
3 | include ViewStateUtils
4 |
5 | def to_sym
6 | self.class.to_s.split(":").last.gsub(/[A-Z]+/,'\1_\0').downcase[1..-1].gsub(/\Avs/,"").to_sym
7 | end
8 |
9 | def opcode
10 | return VIEWSTATE_TYPES.index(self.to_sym)
11 | end
12 |
13 | def size
14 | return @value.size
15 | end
16 |
17 | def serialize
18 | stack = ""
19 | stack << self.write_byte(self.opcode)
20 | if self.respond_to?(:typeref)
21 | stack << self.serialize_type(self.typeref,self.typeval)
22 | end
23 | return stack
24 | end
25 |
26 | def to_xml
27 | xml = REXML::Element.new(self.class.to_s.split(":").last)
28 | if self.respond_to?(:typeref)
29 | xml.add_attribute("typeref",self.typeref)
30 | xml.add_attribute("typeval",self.typeval)
31 | end
32 | # xml.add_attribute("size",self.size)
33 | xml
34 | end
35 |
36 | end
37 | end
38 |
--------------------------------------------------------------------------------
/lib/wwmd/viewstate/vs_stubs.rb:
--------------------------------------------------------------------------------
1 | module WWMD::VSStubs; end
2 | require 'wwmd/viewstate/vs_stubs/vs_stub_helpers'
3 | require 'wwmd/viewstate/vs_stubs/vs_read_value'
4 | require 'wwmd/viewstate/vs_stubs/vs_read_types'
5 | require 'wwmd/viewstate/vs_stubs/vs_value'
6 | require 'wwmd/viewstate/vs_stubs/vs_array'
7 | require 'wwmd/viewstate/vs_stubs/vs_binary_serialized'
8 | require 'wwmd/viewstate/vs_stubs/vs_int_enum'
9 | require 'wwmd/viewstate/vs_stubs/vs_hashtable'
10 | require 'wwmd/viewstate/vs_stubs/vs_hybrid_dict'
11 | require 'wwmd/viewstate/vs_stubs/vs_list'
12 | require 'wwmd/viewstate/vs_stubs/vs_pair'
13 | require 'wwmd/viewstate/vs_stubs/vs_sparse_array'
14 | require 'wwmd/viewstate/vs_stubs/vs_string'
15 | require 'wwmd/viewstate/vs_stubs/vs_string_array'
16 | require 'wwmd/viewstate/vs_stubs/vs_string_formatted'
17 | require 'wwmd/viewstate/vs_stubs/vs_triplet'
18 | require 'wwmd/viewstate/vs_stubs/vs_type'
19 | require 'wwmd/viewstate/vs_stubs/vs_unit'
20 | require 'wwmd/viewstate/vs_stubs/vs_indexed_string'
21 | require 'wwmd/viewstate/vs_stubs/vs_indexed_string_ref'
22 |
23 |
--------------------------------------------------------------------------------
/lib/wwmd/viewstate/vs_stubs/vs_hashtable.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | class VSStubs::VSHashtable
3 | include VSStubHelpers
4 |
5 | attr_accessor :value
6 |
7 | def initialize()
8 | @value = []
9 | end
10 |
11 | def add(obj1,obj2)
12 | @value << [obj1,obj2]
13 | end
14 |
15 | def serialize
16 | stack = super
17 | stack << self.write_7bit_encoded_int(self.size)
18 | self.value.each do |k,v|
19 | stack << k.serialize
20 | stack << v.serialize
21 | end
22 | return stack
23 | end
24 |
25 | def to_xml
26 | xml = super
27 | xml.add_attribute("size",self.value.size.to_s)
28 | self.value.each do |k,v|
29 | pair = REXML::Element.new("Pair")
30 | key = REXML::Element.new("Key")
31 | key.add_element(k.to_xml)
32 | val = REXML::Element.new("Value")
33 | val.add_element(v.to_xml)
34 | pair.add_element(key)
35 | pair.add_element(val)
36 | xml.add_element(pair)
37 | end
38 | xml
39 | end
40 |
41 | end
42 | end
43 |
--------------------------------------------------------------------------------
/lib/wwmd/viewstate/vs_stubs/vs_hybrid_dict.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | class VSStubs::VSHybridDict
3 | include VSStubHelpers
4 |
5 | attr_accessor :value
6 |
7 | def initialize()
8 | @value = []
9 | end
10 |
11 | def add(obj1,obj2)
12 | @value << [obj1,obj2]
13 | end
14 |
15 | def serialize
16 | stack = super
17 | stack << self.write_7bit_encoded_int(self.size)
18 | self.value.each do |k,v|
19 | stack << k.serialize
20 | stack << v.serialize
21 | end
22 | return stack
23 | end
24 |
25 | def to_xml
26 | xml = super
27 | xml.add_attribute("size",self.value.size.to_s)
28 | self.value.each do |k,v|
29 | pair = REXML::Element.new("Pair")
30 | key = REXML::Element.new("Key")
31 | key.add_element(k.to_xml)
32 | val = REXML::Element.new("Value")
33 | val.add_element(v.to_xml)
34 | pair.add_element(key)
35 | pair.add_element(val)
36 | xml.add_element(pair)
37 | end
38 | xml
39 | end
40 |
41 | end
42 | end
43 |
--------------------------------------------------------------------------------
/spec/spider_csrf_test.spec:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 | require 'wwmd'
3 | include WWMD
4 | require 'spec'
5 |
6 | describe Page do
7 | before(:each) do
8 | @page = Page.new({:base_url => "http://localhost"})
9 | @spider = @page.spider
10 | @spider.csrf_token = "CsRf"
11 | end
12 |
13 | it "should remove csrf tokens from visited and queued" do
14 | url = "http://localhost/foo.php?CsRf=something&bar=baz"
15 | links = ["http://localhost/q1.php?CsRf=omg&first=FIRST"]
16 | @spider.add(url,links)
17 | @spider.visited.first.should == "http://localhost/foo.php?CsRf=&bar=baz"
18 | @spider.queued.first.should == "http://localhost/q1.php?CsRf=&first=FIRST"
19 | end
20 |
21 | it "should work normally" do
22 | url = "http://localhost/foo.php?hithere=something&bar=baz"
23 | links = ["http://localhost/q1.php?hithere=omg&first=FIRST"]
24 | @spider.add(url,links)
25 | @spider.visited.first.should == "http://localhost/foo.php?hithere=something&bar=baz"
26 | @spider.queued.first.should == "http://localhost/q1.php?hithere=omg&first=FIRST"
27 | end
28 | end
29 |
--------------------------------------------------------------------------------
/tasks/test.rake:
--------------------------------------------------------------------------------
1 |
2 | if test(?e, PROJ.test.file) or not PROJ.test.files.to_a.empty?
3 | require 'rake/testtask'
4 |
5 | namespace :test do
6 |
7 | Rake::TestTask.new(:run) do |t|
8 | t.libs = PROJ.libs
9 | t.test_files = if test(?f, PROJ.test.file) then [PROJ.test.file]
10 | else PROJ.test.files end
11 | t.ruby_opts += PROJ.ruby_opts
12 | t.ruby_opts += PROJ.test.opts
13 | end
14 |
15 | if HAVE_RCOV
16 | desc 'Run rcov on the unit tests'
17 | task :rcov => :clobber_rcov do
18 | opts = PROJ.rcov.opts.dup << '-o' << PROJ.rcov.dir
19 | opts = opts.join(' ')
20 | files = if test(?f, PROJ.test.file) then [PROJ.test.file]
21 | else PROJ.test.files end
22 | files = files.join(' ')
23 | sh "#{RCOV} #{files} #{opts}"
24 | end
25 |
26 | task :clobber_rcov do
27 | rm_r 'coverage' rescue nil
28 | end
29 | end
30 |
31 | end # namespace :test
32 |
33 | desc 'Alias to test:run'
34 | task :test => 'test:run'
35 |
36 | task :clobber => 'test:clobber_rcov' if HAVE_RCOV
37 |
38 | end
39 |
40 | # EOF
41 |
--------------------------------------------------------------------------------
/tasks/git.rake:
--------------------------------------------------------------------------------
1 |
2 | if HAVE_GIT
3 |
4 | namespace :git do
5 |
6 | # A prerequisites task that all other tasks depend upon
7 | task :prereqs
8 |
9 | desc 'Show tags from the Git repository'
10 | task :show_tags => 'git:prereqs' do |t|
11 | puts %x/git tag/
12 | end
13 |
14 | desc 'Create a new tag in the Git repository'
15 | task :create_tag => 'git:prereqs' do |t|
16 | v = ENV['VERSION'] or abort 'Must supply VERSION=x.y.z'
17 | abort "Versions don't match #{v} vs #{PROJ.version}" if v != PROJ.version
18 |
19 | tag = "%s-%s" % [PROJ.name, PROJ.version]
20 | msg = "Creating tag for #{PROJ.name} version #{PROJ.version}"
21 |
22 | puts "Creating Git tag '#{tag}'"
23 | unless system "git tag -a -m '#{msg}' #{tag}"
24 | abort "Tag creation failed"
25 | end
26 |
27 | if %x/git remote/ =~ %r/^origin\s*$/
28 | unless system "git push origin #{tag}"
29 | abort "Could not push tag to remote Git repository"
30 | end
31 | end
32 | end
33 |
34 | end # namespace :git
35 |
36 | task 'gem:release' => 'git:create_tag'
37 |
38 | end # if HAVE_GIT
39 |
40 | # EOF
41 |
--------------------------------------------------------------------------------
/lib/wwmd/page/helpers.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | class Page
3 | # copy and paste from burp request windows
4 | # page object gets set with headers and url (not correct)
5 | # returns [headers,form]
6 | # form = page.from_paste
7 |
8 | def from_input(req)
9 | self.enable_cookies = false
10 | return false if not req
11 | h,b = req.chomp.split("\r\n\r\n",2)
12 | oh = h
13 | h = h.split("\r\n")
14 | m,u,p = h.shift.split(" ")
15 | return nil unless m =~ (/^(POST|GET)/)
16 | self.url = self.base_url + u
17 | self.headers_from_array(h)
18 | self.body_data = b
19 | self.set_data
20 | form = b.to_form
21 | form.action = @urlparse.parse(self.base_url, u).to_s
22 | [oh,form]
23 | end
24 |
25 | def from_file(fn)
26 | h = headers.clone
27 | ret = from_input(File.read(fn))
28 | headers.replace(h)
29 | ret
30 | end
31 |
32 | def from_paste
33 | from_input(%x[pbpaste])
34 | end
35 |
36 | def resp_paste
37 | self.body_data = %x[pbpaste].split("\r\n\r\n",2)[1]
38 | self.set_data
39 | end
40 | end
41 | end
42 |
--------------------------------------------------------------------------------
/lib/wwmd/viewstate/viewstate_yaml.rb:
--------------------------------------------------------------------------------
1 | class String
2 | # right now I have no idea why "\x0d\x0a" is getting munged in yaml transforms
3 | # something weird helped find by timur@. double up "\r" before "\n" works
4 | # this might be mac specific and break on other platforms. I don't care.
5 | # patch not for general use do not try this at home.
6 | def to_yaml( opts = {} )
7 | YAML::quick_emit( is_complex_yaml? ? object_id : nil, opts ) do |out|
8 | if is_binary_data?
9 | out.scalar( "tag:yaml.org,2002:binary", [self].pack("m"), :literal )
10 | elsif ( self =~ /\r\n/ )
11 | # out.scalar( "tag:yaml.org,2002:binary", [self].pack("m"), :literal )
12 | out.scalar( taguri, self.gsub(/\r\n/,"\r\r\n"), :quote2 )
13 | elsif to_yaml_properties.empty?
14 | out.scalar( taguri, self, self =~ /^:/ ? :quote2 : to_yaml_style )
15 | else
16 | out.map( taguri, to_yaml_style ) do |map|
17 | map.add( 'str', "#{self}" )
18 | to_yaml_properties.each do |m|
19 | map.add( m, instance_variable_get( m ) )
20 | end
21 | end
22 | end
23 | end
24 | end
25 | end
26 |
--------------------------------------------------------------------------------
/lib/wwmd/class_extensions/mixins_string_encoding.rb:
--------------------------------------------------------------------------------
1 | =begin rdoc
2 | Place methods to character encodings here
3 | =end
4 |
5 | module WWMD
6 | # This is where character encodings should go as module methods
7 | # to be used as mixins for the String class
8 | module Encoding
9 |
10 | # String.to_utf7 mixin
11 | # (complete hack but it works)
12 | #
13 | # if all=true, encode all characters.
14 | # if all.class=Regexp encode only characters in the passed
15 | # regular expression else default to /[^0-9a-zA-Z]/
16 | #
17 | # used by:
18 | # String.to_utf7
19 | # String.to_utf7!
20 | def to_utf7(all=nil)
21 | if all.kind_of?(Regexp)
22 | reg = all
23 | elsif all.kind_of?(TrueClass)
24 | reg = ESCAPE[:all]
25 | else
26 | reg = ESCAPE[:nalnum] || /[^a-zA-Z0-9]/
27 | end
28 | putd "DEBG:" + reg.inspect
29 | ret = ''
30 | self.each_byte do |b|
31 | if b.chr.match(reg)
32 | ret += "+" + Base64.encode64(b.chr.toutf16)[0..2] + "-"
33 | else
34 | ret += b.chr
35 | end
36 | end
37 | return ret
38 | end
39 | end
40 | end
41 |
--------------------------------------------------------------------------------
/tasks/post_load.rake:
--------------------------------------------------------------------------------
1 |
2 | # This file does not define any rake tasks. It is used to load some project
3 | # settings if they are not defined by the user.
4 |
5 | PROJ.exclude << ["^#{Regexp.escape(PROJ.ann.file)}$",
6 | "^#{Regexp.escape(PROJ.ignore_file)}$",
7 | "^#{Regexp.escape(PROJ.rdoc.dir)}/",
8 | "^#{Regexp.escape(PROJ.rcov.dir)}/"]
9 |
10 | flatten_arrays = lambda do |this,os|
11 | os.instance_variable_get(:@table).each do |key,val|
12 | next if key == :dependencies \
13 | or key == :development_dependencies
14 | case val
15 | when Array; val.flatten!
16 | when OpenStruct; this.call(this,val)
17 | end
18 | end
19 | end
20 | flatten_arrays.call(flatten_arrays,PROJ)
21 |
22 | PROJ.changes ||= paragraphs_of(PROJ.history_file, 0..1).join("\n\n")
23 |
24 | PROJ.description ||= paragraphs_of(PROJ.readme_file, 'description').join("\n\n")
25 |
26 | PROJ.summary ||= PROJ.description.split('.').first
27 |
28 | PROJ.gem.files ||= manifest
29 |
30 | PROJ.gem.executables ||= PROJ.gem.files.find_all {|fn| fn =~ %r/^bin/}
31 |
32 | PROJ.rdoc.main ||= PROJ.readme_file
33 |
34 | # EOF
35 |
--------------------------------------------------------------------------------
/lib/wwmd/viewstate/vs_stubs/vs_read_value.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | class VSStubs::VSReadValue
3 | include VSStubHelpers
4 |
5 | attr_accessor :value
6 |
7 | def initialize(val)
8 | @value = val
9 | end
10 |
11 | def serialize
12 | stack = super
13 | case self.to_sym
14 | when :int16; stack << self.write_short(self.value)
15 | when :int32; stack << self.write_7bit_encoded_int(self.value)
16 | when :byte; stack << self.write_byte(self.value)
17 | when :char; stack << self.write_byte(self.value)
18 | when :date_time; stack << self.write_double(self.value)
19 | when :double; stack << self.write_double(self.value)
20 | when :single; stack << self.write_single(self.value)
21 | when :color; stack << self.write_int32(self.value)
22 | when :known_color; stack << self.write_7bit_encoded_int(self.value)
23 | else; raise "unimplemented #{self.to_sym}"
24 | end
25 | return stack
26 | end
27 |
28 | def to_xml
29 | xml = super
30 | xml.add_text(self.value.to_s)
31 | xml
32 | end
33 |
34 | end
35 | end
36 |
--------------------------------------------------------------------------------
/lib/wwmd/page/inputs.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | class Inputs
3 | attr_accessor :elems
4 |
5 | @cobj = '' # wwmd object
6 | @elems = '' # array of elems parse out by self.new()
7 |
8 | def initialize(*args)
9 | @cobj = args.shift
10 | end
11 |
12 | def show
13 | putx @elems
14 | end
15 |
16 | # call me from Page.set_data
17 | def set
18 | @elems = [@cobj.search("//input").map,@cobj.search("//select").map].flatten
19 | end
20 |
21 | def get(attr=nil)
22 | @elems.map { |x| x[attr] }.reject { |y| y.nil? }
23 | end
24 |
25 | #
26 | # return: FormArray containing all page inputs
27 | def form
28 | ret = {}
29 | @elems.map do |x|
30 | name = x['name']
31 | id = x['id']
32 | next if (name.nil? && id.nil?)
33 | value = x['value']
34 | type = x['type']
35 | ret[name] = value
36 | ret[id] = value if ((id || name) != name)
37 | end
38 | return FormArray.new(ret)
39 | end
40 |
41 | #
42 | # return: FormArray containing get params
43 | def params
44 | return FormArray.new(@cobj.cur.clop.to_form)
45 | end
46 | end
47 | end
48 |
--------------------------------------------------------------------------------
/spec/form_array.spec:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 | require 'wwmd'
3 | include WWMD
4 | require 'spec'
5 |
6 | describe FormArray do
7 | before(:each) do
8 | @form = FormArray.new
9 | end
10 |
11 | it "sets a value and reads a value" do
12 | @form["foo"] = "bar"
13 | @form["foo"].should == "bar"
14 | end
15 |
16 | it "reads from a string" do
17 | @form = "foo=bar&baz=eep&argle=bargle".to_form
18 | @form["foo"].should == "bar"
19 | @form["baz"].should == "eep"
20 | @form["argle"].should == "bargle"
21 | end
22 |
23 | it "to_get" do
24 | str = "foo=bar&baz=eep&argle=bargle"
25 | get = "?" + str
26 | @form = str.to_form
27 | @form.to_get.should == get
28 | end
29 |
30 | it "remove_nulls!" do
31 | @form["var1"] = "not null"
32 | @form["var2"] = ""
33 | @form["var3"] = nil
34 | @form.remove_nulls!
35 | @form.size.should == 1
36 | @form["var1"].should == "not null"
37 | end
38 |
39 | it "clones correctly" do
40 | @form = "foo=bar&baz=eep&argle=bargle".to_form
41 | lform = @form.clone
42 | lform["foo"] = "test"
43 | @form["foo"].should == "bar"
44 | lform["foo"].should == "test"
45 | end
46 |
47 | it "escapes characters correctly"
48 | it "unescapes characters correctly"
49 | end
50 |
--------------------------------------------------------------------------------
/History.txt:
--------------------------------------------------------------------------------
1 | == 0.2.20 / 2009-08-24
2 |
3 | * convert ViewState to use StringIO
4 | * include iZsh changes to fix some issues
5 | * still lots to do and there's no time
6 |
7 | == 0.2.19 /
8 |
9 | * nothing to see here... move along quitely
10 |
11 | == 0.2.18 /
12 |
13 | * black hat special
14 |
15 | == 0.2.17 / 2009-06-22
16 |
17 | * lots happening between here and .9
18 | * viewstate refactor complete
19 | * clean up page/page.rb
20 | * cleaning up page/headers.rb
21 | * cleaning up page/scrape.rb
22 | * FormArray refactor includes the form action (full URL)
23 | * page.submit(page.get_form)
24 | * still bugs in URLParse but hunting them down throw by throw
25 | * remove broken NTLM (preserve auth header warnings)
26 | * remove WWMDConfig in favor of WWMD module methods but preserve old class for back compat
27 | * add some burp helpers
28 | * Page#from_paste (take entire request into Page and turn off cookies)
29 | * burp log parsing coming
30 | * Curb includes http_put (with header munging bug so careful)
31 | * internal monkey patch for Curb to do arbitrary verb tampering (not here yet)
32 | * add String#pbcopy
33 | * move lots of things around for clarity during refactor
34 | * refactor progressing but still unstable (2.0.16 gem including viewstate is good to go)
35 |
36 | == 0.2.9 / 2009-05-05
37 |
38 | * bonesify
39 |
--------------------------------------------------------------------------------
/tasks/rdoc.rake:
--------------------------------------------------------------------------------
1 |
2 | require 'rake/rdoctask'
3 |
4 | namespace :doc do
5 |
6 | desc 'Generate RDoc documentation'
7 | Rake::RDocTask.new do |rd|
8 | rdoc = PROJ.rdoc
9 | rd.main = rdoc.main
10 | rd.rdoc_dir = rdoc.dir
11 |
12 | incl = Regexp.new(rdoc.include.join('|'))
13 | excl = Regexp.new(rdoc.exclude.join('|'))
14 | files = PROJ.gem.files.find_all do |fn|
15 | case fn
16 | when excl; false
17 | when incl; true
18 | else false end
19 | end
20 | rd.rdoc_files.push(*files)
21 |
22 | name = PROJ.name
23 | rf_name = PROJ.rubyforge.name
24 |
25 | title = "#{name}-#{PROJ.version} Documentation"
26 | title = "#{rf_name}'s " + title if rf_name.valid? and rf_name != name
27 |
28 | rd.options << "-t #{title}"
29 | rd.options.concat(rdoc.opts)
30 | end
31 |
32 | desc 'Generate ri locally for testing'
33 | task :ri => :clobber_ri do
34 | sh "#{RDOC} --ri -o ri ."
35 | end
36 |
37 | task :clobber_ri do
38 | rm_r 'ri' rescue nil
39 | end
40 |
41 | end # namespace :doc
42 |
43 | desc 'Alias to doc:rdoc'
44 | task :doc => 'doc:rdoc'
45 |
46 | desc 'Remove all build products'
47 | task :clobber => %w(doc:clobber_rdoc doc:clobber_ri)
48 |
49 | remove_desc_for_task %w(doc:clobber_rdoc)
50 |
51 | # EOF
52 |
--------------------------------------------------------------------------------
/lib/wwmd/page/html2text_nokogiri.rb:
--------------------------------------------------------------------------------
1 | =begin rdoc
2 | html2text that works with Nokogiri
3 | =end
4 | module WWMD
5 |
6 | INLINETAGS = ['a','abbr','acronym','address','b','bdo','big','cite',
7 | 'code','del','dfn','em','font','i','ins','kbd','label',
8 | 'noframes','noscript','q','s','samp','small','span',
9 | 'strike','strong','sub','sup','td','th','tt','u',
10 | 'html','body','table']
11 | BLOCKTAGS = ['blockquote','center','dd','div','fieldset','form',
12 | 'h1','h2','h3','h4','h5','h6','p','pre','tr','var',]
13 | LISTTAGS = ['dir','dl','menu','ol','ul']
14 | ITEMTAGS = ['li','dt']
15 | SPECIALTAGS = ['br','hr']
16 |
17 | class Page
18 | def html2text
19 | arr = []
20 | self.scrape.hdoc.traverse do |x|
21 | arr << [x.parent.name,x.text] if x.text?
22 | if x.elem?
23 | arr << [x.name,""] if SPECIALTAGS.include?(x.name)
24 | end
25 | end
26 | ret = ""
27 | arr.each do |name,str|
28 | (ret += "\n"; next ) if name == "br"
29 | (ret += "\n" + ("-" * 72) + "\n"; next) if name == "hr"
30 | s = str.strip
31 | if BLOCKTAGS.include?(name) or LISTTAGS.include?(name)
32 | s += "\n"
33 | elsif ITEMTAGS.include?(name)
34 | s = "* " + s + "\n"
35 | end
36 | ret += s
37 | end
38 | ret.gsub(/\n+/) { "\n" }
39 | ret.gsub(/[^\x20-\x7e,\n]/,"").gsub(/^\n/,"")
40 | end
41 | end
42 | end
43 |
--------------------------------------------------------------------------------
/tasks/spec.rake:
--------------------------------------------------------------------------------
1 |
2 | if HAVE_SPEC_RAKE_SPECTASK and not PROJ.spec.files.to_a.empty?
3 | require 'spec/rake/verify_rcov'
4 |
5 | namespace :spec do
6 |
7 | desc 'Run all specs with basic output'
8 | Spec::Rake::SpecTask.new(:run) do |t|
9 | t.ruby_opts = PROJ.ruby_opts
10 | t.spec_opts = PROJ.spec.opts
11 | t.spec_files = PROJ.spec.files
12 | t.libs += PROJ.libs
13 | end
14 |
15 | desc 'Run all specs with text output'
16 | Spec::Rake::SpecTask.new(:specdoc) do |t|
17 | t.ruby_opts = PROJ.ruby_opts
18 | t.spec_opts = PROJ.spec.opts + ['--format', 'specdoc']
19 | t.spec_files = PROJ.spec.files
20 | t.libs += PROJ.libs
21 | end
22 |
23 | if HAVE_RCOV
24 | desc 'Run all specs with RCov'
25 | Spec::Rake::SpecTask.new(:rcov) do |t|
26 | t.ruby_opts = PROJ.ruby_opts
27 | t.spec_opts = PROJ.spec.opts
28 | t.spec_files = PROJ.spec.files
29 | t.libs += PROJ.libs
30 | t.rcov = true
31 | t.rcov_dir = PROJ.rcov.dir
32 | t.rcov_opts = PROJ.rcov.opts + ['--exclude', 'spec']
33 | end
34 |
35 | RCov::VerifyTask.new(:verify) do |t|
36 | t.threshold = PROJ.rcov.threshold
37 | t.index_html = File.join(PROJ.rcov.dir, 'index.html')
38 | t.require_exact_threshold = PROJ.rcov.threshold_exact
39 | end
40 |
41 | task :verify => :rcov
42 | remove_desc_for_task %w(spec:clobber_rcov)
43 | end
44 |
45 | end # namespace :spec
46 |
47 | desc 'Alias to spec:run'
48 | task :spec => 'spec:run'
49 |
50 | task :clobber => 'spec:clobber_rcov' if HAVE_RCOV
51 |
52 | end # if HAVE_SPEC_RAKE_SPECTASK
53 |
54 | # EOF
55 |
--------------------------------------------------------------------------------
/lib/wwmd/viewstate/vs_stubs/vs_sparse_array.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | class VSStubs::VSSparseArray
3 | include VSStubHelpers
4 |
5 | attr_accessor :value
6 | attr_reader :typeref
7 | attr_reader :typeval
8 | attr_reader :size
9 | attr_reader :elems
10 |
11 | def initialize(typeref,typeval,size,elems)
12 | @typeref = typeref
13 | @typeval = typeval
14 | @size = size
15 | @elems = elems
16 | @value = []
17 | end
18 |
19 | def add(idx,obj)
20 | @value[idx] = obj
21 | end
22 |
23 | def serialize
24 | stack = super
25 | stack << self.write_7bit_encoded_int(self.size)
26 | stack << self.write_7bit_encoded_int(self.elems)
27 | self.value.each_index do |i|
28 | next if self.value[i].nil?
29 | stack << self.write_7bit_encoded_int(i)
30 | stack << self.value[i].serialize
31 | end
32 | return stack
33 | end
34 |
35 | def to_xml
36 | xml = super
37 | siz = REXML::Element.new("Size")
38 | siz.add_text(self.size.to_s)
39 | ele = REXML::Element.new("Elements")
40 | ele.add_text(self.elems.to_s)
41 | xml.add_element(siz)
42 | xml.add_element(ele)
43 | self.value.each_index do |i|
44 | next if self.value[i].nil?
45 | pair = REXML::Element.new("Pair")
46 | idx = REXML::Element.new("Index")
47 | idx.add_text(i.to_s)
48 | val = REXML::Element.new("Value")
49 | val.add_element(value[i].to_xml)
50 | pair.add_element(idx)
51 | pair.add_element(val)
52 | xml.add_element(pair)
53 | end
54 | xml
55 | end
56 |
57 | end
58 | end
59 |
--------------------------------------------------------------------------------
/lib/wwmd/class_extensions/extensions_encoding.rb:
--------------------------------------------------------------------------------
1 | require 'wwmd/class_extensions/mixins_string_encoding'
2 | class String
3 | include WWMD::Encoding
4 |
5 | @@he = HTMLEntities.new
6 |
7 | # base 64 decode
8 | def b64d
9 | self.unpack("m").first
10 | end
11 |
12 | # base 64 encode
13 | def b64e
14 | [self].pack("m").gsub("\n","")
15 | end
16 |
17 | # URI.escape using defaults or passed regexp
18 | def escape(reg=nil,unicodify=false)
19 | if reg.nil?
20 | ret = URI.escape(self)
21 | elsif reg.kind_of?(Symbol)
22 | case reg
23 | when :none; return self
24 | when :default; ret = URI.escape(self)
25 | else; ret = URI.escape(self,WWMD::ESCAPE[reg])
26 | end
27 | else
28 | ret = URI.escape(self,reg)
29 | end
30 | if unicodify
31 | ret.gsub!(/%/,"%u00")
32 | end
33 | return ret
34 | end
35 |
36 | # URI.escape
37 | def escape_url(reg=WWMD::ESCAPE[:url])#:nodoc:
38 | self.escape(reg)
39 | end
40 |
41 | def escape_xss(reg=WWMD::ESCAPE[:xss])#:nodoc:
42 | self.escape(reg)
43 | end
44 |
45 | def escape_default(reg=WWMD::ESCAPE[:default])
46 | self.escape(reg)
47 | end
48 | # URI.escape all characters in string
49 | def escape_all#:nodoc:
50 | self.escape(/.*/)
51 | end
52 |
53 | # URI.unescape
54 | def unescape
55 | URI.unescape(self)
56 | end
57 |
58 | # html entity encode string
59 | # sym = :basic :named :decimal :hexadecimal
60 | def eencode(sym=nil)
61 | sym = :named if sym.nil?
62 | @@he.encode(self,sym)
63 | end
64 |
65 | # decode html entities in string
66 | def edecode
67 | return @@he.decode(self)
68 | end
69 |
70 | # quoted printable
71 | def to_qp
72 | [self].pack("M")
73 | end
74 |
75 | def from_qp
76 | self.unpack("M").first
77 | end
78 |
79 | end
80 |
--------------------------------------------------------------------------------
/tasks/rubyforge.rake:
--------------------------------------------------------------------------------
1 |
2 | if PROJ.rubyforge.name.valid? && HAVE_RUBYFORGE
3 |
4 | require 'rubyforge'
5 | require 'rake/contrib/sshpublisher'
6 |
7 | namespace :gem do
8 | desc 'Package and upload to RubyForge'
9 | task :release => [:clobber, 'gem'] do |t|
10 | v = ENV['VERSION'] or abort 'Must supply VERSION=x.y.z'
11 | abort "Versions don't match #{v} vs #{PROJ.version}" if v != PROJ.version
12 | pkg = "pkg/#{PROJ.gem._spec.full_name}"
13 |
14 | if $DEBUG then
15 | puts "release_id = rf.add_release #{PROJ.rubyforge.name.inspect}, #{PROJ.name.inspect}, #{PROJ.version.inspect}, \"#{pkg}.tgz\""
16 | puts "rf.add_file #{PROJ.rubyforge.name.inspect}, #{PROJ.name.inspect}, release_id, \"#{pkg}.gem\""
17 | end
18 |
19 | rf = RubyForge.new
20 | rf.configure rescue nil
21 | puts 'Logging in'
22 | rf.login
23 |
24 | c = rf.userconfig
25 | c['release_notes'] = PROJ.description if PROJ.description
26 | c['release_changes'] = PROJ.changes if PROJ.changes
27 | c['preformatted'] = true
28 |
29 | files = Dir.glob("#{pkg}*.*")
30 |
31 | puts "Releasing #{PROJ.name} v. #{PROJ.version}"
32 | rf.add_release PROJ.rubyforge.name, PROJ.name, PROJ.version, *files
33 | end
34 | end # namespace :gem
35 |
36 |
37 | namespace :doc do
38 | desc "Publish RDoc to RubyForge"
39 | task :release => %w(doc:clobber_rdoc doc:rdoc) do
40 | config = YAML.load(
41 | File.read(File.expand_path('~/.rubyforge/user-config.yml'))
42 | )
43 |
44 | host = "#{config['username']}@rubyforge.org"
45 | remote_dir = "/var/www/gforge-projects/#{PROJ.rubyforge.name}/"
46 | remote_dir << PROJ.rdoc.remote_dir if PROJ.rdoc.remote_dir
47 | local_dir = PROJ.rdoc.dir
48 |
49 | Rake::SshDirPublisher.new(host, remote_dir, local_dir).upload
50 | end
51 | end # namespace :doc
52 |
53 | end # if HAVE_RUBYFORGE
54 |
55 | # EOF
56 |
--------------------------------------------------------------------------------
/lib/wwmd/wwmd_config.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 |
3 | class WWMDConfig#:nodoc:
4 | # for backward compat
5 | def self.load_config(file); WWMD::load_config(file); end
6 | def self.parse_opts(args); WWMD::parse_opts(args); end
7 | end
8 |
9 | def load_config(file)
10 | begin
11 | config = YAML.load_file(file)
12 | rescue => e
13 | putw "config file not found #{file}"
14 | putw e.inspect
15 | exit
16 | end
17 | return config
18 | end
19 |
20 | def parse_opts(args)
21 | inopts = Hash.new
22 | inopts[:max_redirects] = 10
23 | inopts[:timeout] = 30
24 | inopts[:scrape_warn] = false
25 | opts = OptionParser.new do |opts|
26 | # set defaults
27 | opts.on("-p", "--password PASSWORD", "Password") { |v| inopts[:password] = v }
28 | opts.on("-u", "--username USERNAME", "Username") { |v| inopts[:username] = v }
29 | opts.on("--header_file HEADER_FILE","Header file") { |v| inopts[:header_file] = v }
30 | opts.on("--base_url BASE_URL","Base url") { |v| inopts[:base_url] = v }
31 | opts.on("--use_proxy PROXY_URL", "Use proxy at url") do |v|
32 | ENV['HTTP_PROXY'] = "http://" + v.to_s
33 | inopts[:use_proxy] = true
34 | inopts[:proxy_url] = v
35 | end
36 | opts.on("--no_proxy","do not use proxy") do |v|
37 | inopts[:use_proxy] = false
38 | inopts[:proxy_url] = nil
39 | end
40 | opts.on("--use_auth","login before getting url") { |v| inopts[:use_auth] = true }
41 | opts.on("--no_auth","no login before getting url") { |v| inopts[:use_auth] = false }
42 | opts.on("--debug","debugging really doesn't work") { |v| inopts[:debug] = true }
43 | opts.on_tail("-h", "--help", "Show this message") do
44 | puts opts
45 | exit
46 | end
47 | end
48 | opts.parse!(args)
49 | return inopts
50 | end
51 |
52 | end
53 |
--------------------------------------------------------------------------------
/lib/wwmd/page/constants.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | XSSFISH = "<;'\"}()[]>{"
3 |
4 | DEFAULTS = {
5 | :base_url => "",
6 | :use_auth => true,
7 | :enable_cookies => true,
8 | :cookiejar => "./__cookiejar",
9 | :follow_location => true,
10 | :max_redirects => 20,
11 | :use_proxy => false,
12 | :debug => false,
13 | :scrape_warn => true,
14 | :parse => true,
15 | :timeout => 20,
16 | }
17 |
18 | ESCAPE = {
19 | :url => /[^a-zA-Z0-9\-_%]/,
20 | :nalnum => /[^a-zA-Z0-9]/,
21 | :xss => /[^a-zA-Z0-9=?()']/,
22 | :ltgt => /[<>]/,
23 | :all => /.*/,
24 | # :b64 => /[=+\/]/,
25 | :b64 => /[^a-zA-Z0-9]/,
26 | :none => :none,
27 | :default => :default,
28 | }
29 |
30 | UA = {
31 | :mozilla => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.1.16) Gecko/20080702 Firefox/2.0.0.16",
32 | :moz3 => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.0.1) Gecko/2008070206 Firefox/3.0.1",
33 | :ie6 => "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
34 | :ie7 => "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
35 | :ie8 => "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)",
36 | :opera => "Opera/9.20 (Windows NT 6.0; U; en)",
37 | :safari => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; en) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.22",
38 | :safari4 => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_7; en-us) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Safari/530.17",
39 | :wwmd => "Mozilla/5.0 (compatible; WWMD #{WWMD::VERSION}; o_hai)"
40 | }
41 |
42 | DEFAULT_HEADERS = {
43 | "User-Agent" => UA[:wwmd],
44 | "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
45 | "Accept-Language" => "en-US,en;q=0.8,en-au;q=0.6,en-us;q=0.4,en;q=0.2",
46 | "Accept-Encoding" => "gzip,deflate",
47 | "Accept-Charset" => "SO-8859-1,utf-8;q=0.7,*;q=0.7",
48 | "Keep-Alive" => "300",
49 | "Connection" => "keep-alive",
50 | }
51 |
52 | HEADERS = {
53 | :default => nil,
54 | :utf7 => {
55 | "Content-Type" => "application/x-www-form-urlencoded;charset=UTF-7",
56 | "Content-Transfer-Encoding" => "7bit",
57 | },
58 | :ajax => {
59 | "X-Requested-With" => "XMLHttpRequest",
60 | "X-Prototype-Version" => "1.5.0",
61 | },
62 | }
63 | end
64 |
--------------------------------------------------------------------------------
/lib/wwmd.rb:
--------------------------------------------------------------------------------
1 | # third-party
2 | require 'rubygems'
3 | unless self.respond_to?(:java)
4 | require 'ruby-debug'
5 | require 'curb'
6 | else
7 | # I_KNOW_I_AM_USING_AN_OLD_AND_BUGGY_VERSION_OF_LIBXML2 = true
8 | # require 'curb_ffi'
9 | # include CurbFfi
10 | end
11 | require 'yaml'
12 | require 'fileutils'
13 | require 'base64'
14 | require 'optparse'
15 | require 'digest'
16 | require 'uri'
17 | require 'htmlentities'
18 | require 'nkf'
19 | require 'rexml/document'
20 |
21 | module WWMD
22 |
23 | # :stopdoc:
24 | VERSION = "0.2.20.1"
25 | PARSER = :nokogiri # :nokogiri || :hpricot
26 | LIBPATH = ::File.expand_path(::File.dirname(__FILE__)) + ::File::SEPARATOR
27 | PATH = ::File.dirname(LIBPATH) + ::File::SEPARATOR
28 | # :startdoc:
29 |
30 | # Returns the version string for the library.
31 | #
32 | def self.version
33 | VERSION
34 | end
35 |
36 | # Returns the library path for the module. If any arguments are given,
37 | # they will be joined to the end of the libray path using
38 | # File.join.
39 | #
40 | def self.libpath( *args )
41 | args.empty? ? LIBPATH : ::File.join(LIBPATH, args.flatten)
42 | end
43 |
44 | # Returns the lpath for the module. If any arguments are given,
45 | # they will be joined to the end of the path using
46 | # File.join.
47 | #
48 | def self.path( *args )
49 | args.empty? ? PATH : ::File.join(PATH, args.flatten)
50 | end
51 |
52 | # Utility method used to require all files ending in .rb that lie in the
53 | # directory below this file that has the same name as the filename passed
54 | # in. Optionally, a specific _directory_ name can be passed in such that
55 | # the _filename_ does not have to be equivalent to the directory.
56 | #
57 | def self.require_all_libs_relative_to( fname, dir = nil )
58 | dir ||= ::File.basename(fname, '.*')
59 | search_me = ::File.expand_path(
60 | ::File.join(::File.dirname(fname), dir, '**', '*.rb'))
61 |
62 | Dir.glob(search_me).sort.each do |rb|
63 | next if rb =~ /html2text_/
64 | require rb
65 | end
66 | end
67 |
68 | end # module WWMD
69 |
70 | WWMD.require_all_libs_relative_to(__FILE__)
71 |
72 | # special case parser
73 |
74 | if WWMD::PARSER == :nokogiri
75 | require 'nokogiri'
76 | WWMD::HDOC = Nokogiri::HTML
77 | require 'wwmd/page/html2text_nokogiri'
78 | else
79 | require 'hpricot'
80 | WWMD::HDOC = Hpricot
81 | require 'wwmd/page/html2text_hpricot'
82 | end
83 |
84 | # EOF
85 |
--------------------------------------------------------------------------------
/examples/wwmd_example.rb:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 | require 'rubygems'
3 | require 'wwmd'
4 | include WWMD
5 |
6 | $stop = lambda { Debugger.breakpoint; Debugger.catchpoint }
7 |
8 | module WWMD
9 | class Page
10 | # here we add directly to Page.login instead of creating an outside
11 | # helper class. Normally we create a mixin script for this.
12 | def login
13 | self.get(self.opts[:base_url]) ;# GET the main page redirects to /login
14 | form = self.get_form ;# get the login form
15 | if form.nil? then ;# did we actually get a form?
16 | puts "WARN: No login form on base page"
17 | return (self.logged_in = false)
18 | end
19 | form.set("name",self.opts[:username]) ;# set login form variables from config
20 | form.set("password",self.opts[:password])
21 | self.url = self.action ;# set the url to submit to to the form action
22 | self.submit(form) ;# submit the form
23 |
24 | # perform some check to make sure we aren't still on the login page
25 | # (this naively checks to make sure we don't have any password fields on the current page
26 | self.logged_in = (self.search("//input[@type='password']").size == 0)
27 | end
28 | end
29 | end
30 |
31 | # parse options and load configuration file
32 | inopts = WWMDConfig.parse_opts(ARGV)
33 | conf = ARGV[0] || "./config_example.yaml"
34 | opts = WWMDConfig.load_config(conf)
35 | inopts.each_pair { |k,v| opts[k] = v }
36 | $opts = opts
37 |
38 | # create our Page object and name it page
39 | page = Page.new(opts)
40 | page.scrape.warn = false ;# don't complain about not overwriting scrape
41 |
42 | # move our spider object up here
43 | spider = page.spider
44 |
45 | # output current configuration
46 | puts "current configuration:\n"
47 | page.opts.each_pair { |k,v|
48 | if k == :password then
49 | puts "#{k} :: ********"
50 | else
51 | puts "#{k} :: #{v}"
52 | end
53 | }
54 | puts "\n"
55 |
56 | # use the Helper method to login to the application
57 | if page.opts[:use_auth] then
58 | page.login
59 | if page.logged_in? then
60 | puts "logged in as #{opts[:username]}"
61 | else
62 | puts "WARN: could not log in" if !page.logged_in?
63 | end
64 | else
65 | page.get opts[:base_url]
66 | end
67 |
68 | # report our current location and let's drop to irb with
69 | # our whole context complete
70 | puts "current location: #{page.current}"
71 | puts "enter \"irb\" to go to the console"
72 |
73 | $stop.call
74 |
--------------------------------------------------------------------------------
/tasks/ann.rake:
--------------------------------------------------------------------------------
1 |
2 | begin
3 | require 'bones/smtp_tls'
4 | rescue LoadError
5 | require 'net/smtp'
6 | end
7 | require 'time'
8 |
9 | namespace :ann do
10 |
11 | # A prerequisites task that all other tasks depend upon
12 | task :prereqs
13 |
14 | file PROJ.ann.file do
15 | ann = PROJ.ann
16 | puts "Generating #{ann.file}"
17 | File.open(ann.file,'w') do |fd|
18 | fd.puts("#{PROJ.name} version #{PROJ.version}")
19 | fd.puts(" by #{Array(PROJ.authors).first}") if PROJ.authors
20 | fd.puts(" #{PROJ.url}") if PROJ.url.valid?
21 | fd.puts(" (the \"#{PROJ.release_name}\" release)") if PROJ.release_name
22 | fd.puts
23 | fd.puts("== DESCRIPTION")
24 | fd.puts
25 | fd.puts(PROJ.description)
26 | fd.puts
27 | fd.puts(PROJ.changes.sub(%r/^.*$/, '== CHANGES'))
28 | fd.puts
29 | ann.paragraphs.each do |p|
30 | fd.puts "== #{p.upcase}"
31 | fd.puts
32 | fd.puts paragraphs_of(PROJ.readme_file, p).join("\n\n")
33 | fd.puts
34 | end
35 | fd.puts ann.text if ann.text
36 | end
37 | end
38 |
39 | desc "Create an announcement file"
40 | task :announcement => ['ann:prereqs', PROJ.ann.file]
41 |
42 | desc "Send an email announcement"
43 | task :email => ['ann:prereqs', PROJ.ann.file] do
44 | ann = PROJ.ann
45 | from = ann.email[:from] || Array(PROJ.authors).first || PROJ.email
46 | to = Array(ann.email[:to])
47 |
48 | ### build a mail header for RFC 822
49 | rfc822msg = "From: #{from}\n"
50 | rfc822msg << "To: #{to.join(',')}\n"
51 | rfc822msg << "Subject: [ANN] #{PROJ.name} #{PROJ.version}"
52 | rfc822msg << " (#{PROJ.release_name})" if PROJ.release_name
53 | rfc822msg << "\n"
54 | rfc822msg << "Date: #{Time.new.rfc822}\n"
55 | rfc822msg << "Message-Id: "
56 | rfc822msg << "<#{"%.8f" % Time.now.to_f}@#{ann.email[:domain]}>\n\n"
57 | rfc822msg << File.read(ann.file)
58 |
59 | params = [:server, :port, :domain, :acct, :passwd, :authtype].map do |key|
60 | ann.email[key]
61 | end
62 |
63 | params[3] = PROJ.email if params[3].nil?
64 |
65 | if params[4].nil?
66 | STDOUT.write "Please enter your e-mail password (#{params[3]}): "
67 | params[4] = STDIN.gets.chomp
68 | end
69 |
70 | ### send email
71 | Net::SMTP.start(*params) {|smtp| smtp.sendmail(rfc822msg, from, to)}
72 | end
73 | end # namespace :ann
74 |
75 | desc 'Alias to ann:announcement'
76 | task :ann => 'ann:announcement'
77 |
78 | CLOBBER << PROJ.ann.file
79 |
80 | # EOF
81 |
--------------------------------------------------------------------------------
/lib/wwmd/page/reporting_helpers.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | class Page
3 | attr_accessor :status
4 | #:section: Reporting helper methods
5 | # These are methods that generate data for a parsed page
6 |
7 | # return text representation of page code
8 | #
9 | # override with specific statuses in helper depending on page text
10 | # etc to include statuses outside 200 = OK and other = ERR
11 | def page_status
12 | @status = "OK"
13 | @status = "ERR" if self.response_code > 399
14 | end
15 |
16 | # alias_method :status, :page_status#:nodoc:
17 |
18 | # return value of @logged_in
19 | def logged_in?
20 | return @logged_in
21 | end
22 |
23 | # return a string of flags:
24 | # Ll links
25 | # Jj javascript includes
26 | # Ff forms
27 | # Cc comments
28 | def report_flags
29 | self.has_links? ? ret = "L" : ret = "l"
30 | self.has_jlinks? ? ret += "J" : ret += "j"
31 | self.has_form? ? ret += "F" : ret += "f"
32 | self.has_comments? ? ret += "C" : ret += "c"
33 | return ret
34 | end
35 |
36 | def has_links?; return !@links.empty?; end
37 | def has_jlinks?; return !@jlinks.empty?; end
38 | def has_form?; return !(@forms.size < 1); end
39 | def has_comments?; return !@comments.empty?; end
40 |
41 | # return page size in bytes
42 | def size
43 | return self.body_data.size
44 | end
45 |
46 | # return md5sum for self.body_data
47 | def md5
48 | return self.body_data.md5
49 | end
50 |
51 | # does this response have SET-COOKIE headers?
52 | def set_cookies?
53 | ret = FormArray.new()
54 | self.header_data.each do |x|
55 | if x[0].upcase == "SET-COOKIE"
56 | ret << x[1].split(";").first.split("=",2)
57 | end
58 | end
59 | ret
60 | end
61 | alias_method :set_cookies, :set_cookies?
62 |
63 | def time
64 | self.total_time
65 | end
66 |
67 | # return MD5 for DOM fingerprint
68 | # take all tag names in page.to_s.md5
69 | def fingerprint
70 | self.all_tags.to_s.md5
71 | end
72 | alias_method :fp, :fingerprint #:nodoc:
73 |
74 | # alias_method for last_effective_url
75 | def current_url
76 | self.last_effective_url
77 | end
78 |
79 | alias_method :current, :current_url
80 | alias_method :cur, :current_url
81 | alias_method :now, :current_url
82 |
83 | # the last http response code
84 | def code
85 | self.response_code # .to_s
86 | end
87 |
88 | end
89 | end
90 |
--------------------------------------------------------------------------------
/lib/wwmd/page/html2text_hpricot.rb:
--------------------------------------------------------------------------------
1 | # Geoff Davis geoff at geoffdavis.net
2 | # Wed May 2 20:08:44 EDT 2007
3 | # http://rubyforge.org/pipermail/raleigh-rb-members/2007-May/000789.html
4 | # modified by mtracy at matasano.com for WWMD
5 |
6 | module WWMD
7 | InlineTags = ['a','abbr','acronym','address','b','bdo','big','cite','code','del','dfn','em','font','i','ins','kbd','label','noframes','noscript','q','s','samp','small','span','strike','strong','sub','sup','td','th','tt','u','html','body','table']
8 | BlockTags = ['blockquote','br','center','dd','div','fieldset','form','h1','h2','h3', 'h4','h5','h6','hr','p','pre','tr','var',]
9 | ListTags = ['dir','dl','menu','ol','ul']
10 | ItemTags = ['li','dt']
11 | # AsciiEquivalents = {"amp"=>"&","bull"=>"*","copy"=>"(c)","laquo"=>"<<","raquo"=>">>","ge"=> ">=","le"=>"<=","mdash"=>"-","ndash"=>"-","plusmn"=>"+/-","times"=>"x"}
12 |
13 | # NamedCharRegex = Regexp.new("(&("+Hpricot::NamedCharacters.keys.join("|")+");)")
14 |
15 | class Page
16 | def element_to_text(n)
17 | tag = n.etag || n.stag
18 | name = tag.name.downcase
19 | s = ""
20 | is_block = BlockTags.include?(name)
21 | is_list = ListTags.include?(name)
22 | is_item = ItemTags.include?(name)
23 | is_inline = InlineTags.include?(name)
24 | if is_block or is_list or is_item or is_inline
25 | n.each_child do |c|
26 | s += node_to_text(c)
27 | end
28 | if is_block or is_list
29 | s += "\n"
30 | elsif is_item
31 | s = "* " + s + "\n"
32 | end
33 | end
34 | s
35 | end
36 |
37 | def node_to_text(n)
38 | return "" if n.comment?
39 | return element_to_text(n) if n.elem?
40 | return n.inner_text if n.text?
41 |
42 | s = ""
43 | begin
44 | n.each_child do |c|
45 | s += node_to_text(c)
46 | end
47 | rescue => e
48 | putw "WARN: #{e.inspect}"
49 | end
50 | return s
51 | end
52 |
53 | # def lookup_named_char(s)
54 | # c = Hpricot::NamedCharacters[s[1...-1]]
55 | # c.chr if c
56 | # end
57 |
58 | def html2text
59 | doc = self.scrape.hdoc
60 | text = node_to_text(doc)
61 | # text.gsub!(NamedCharRegex){|s| "#{lookup_named_char(s)}"}
62 | # clean up white space
63 | text.gsub!("\r"," ")
64 | text.squeeze!(" ")
65 | text.strip!
66 | ret = ''
67 | text.split(/\n/).each do |l|
68 | l.strip!
69 | next if l == ''
70 | next if l =~ /^\?+$/
71 | ret += "#{l}\n"
72 | end
73 | return ret
74 | end
75 | end
76 | end
77 |
--------------------------------------------------------------------------------
/lib/wwmd/page/form.rb:
--------------------------------------------------------------------------------
1 | =begin rdoc
2 | =end
3 | module WWMD
4 | # == original author of hpricot_form
5 | #
6 | # Chew Choon Keat
7 | # http://blog.yanime.org/
8 | # 19 July 2006
9 | #
10 | # updated by mtracy at matasano.com for use with Nokogiri and WWMD
11 | #
12 | class Form
13 | attr_accessor :hdoc
14 | attr_accessor :fields
15 | attr_accessor :formtag
16 |
17 | def initialize(doc)
18 | @hdoc = doc
19 | @formtag = @hdoc.search("//form")
20 | end
21 |
22 | def method_missing(*args)
23 | hdoc.send(*args)
24 | end
25 |
26 | alias_method :old_fields, :fields
27 | def fields
28 | if PARSER == :nokogiri
29 | @fields ||= (hdoc.search(".//input[@name]",".//select[@name]",".//textarea")).map { |x| Field.new(x) }
30 | else
31 | @fields ||= (hdoc.search("//input[@name]") + hdoc.search("//select[@name]") + hdoc.search("//textarea")).map { |x| Field.new(x) }
32 | end
33 | end
34 |
35 | def field_names
36 | fields.map { |x| x.get_attribute("name") }
37 | end
38 |
39 | def action
40 | return self.get_attribute("action")
41 | end
42 |
43 | def type
44 | return self.get_attribute("method")
45 | end
46 |
47 | end
48 |
49 | class Field < Form
50 | def value
51 | self._value.nil? ? self.get_attribute("value") : self._value
52 | end
53 |
54 | alias_method :get_value, :value #:nodoc:
55 | alias_method :fvalue, :value #:nodoc:
56 |
57 | def fname
58 | self.get_attribute('name')
59 | end
60 |
61 | def ftype
62 | self.get_attribute('type')
63 | end
64 |
65 | def _value
66 | # selection (array)
67 | if PARSER == :nokogiri
68 | ret = hdoc.search(".//option[@selected]").collect { |x| x.get_attribute("value") }
69 | else
70 | ret = hdoc.search("//option[@selected]").collect { |x| x.get_attribute("value") }
71 | end
72 | case ret.size
73 | when 0
74 | if name == "textarea"
75 | if PARSER == :nokogiri
76 | hdoc.text
77 | else
78 | hdoc.innerHTML
79 | end
80 | else
81 | hdoc.get_attribute("value") if (hdoc.get_attribute("checked") || !hdoc.get_attribute("type") =~ /radio|checkbox/)
82 | end
83 | when 1
84 | ret.first
85 | else
86 | ret
87 | end
88 | end
89 |
90 | def to_arr
91 | return [self.name, self.ftype, self.fname, self.fvalue]
92 | end
93 |
94 | def to_text
95 | return "tag=#{self.name} type=#{self.ftype} name=#{self.fname} value=#{self.fvalue}"
96 | end
97 |
98 | end
99 | end
100 |
--------------------------------------------------------------------------------
/lib/wwmd/page/parsing_convenience.rb:
--------------------------------------------------------------------------------
1 | module WWMD
2 | class Page
3 | #:section: Parsing convenience methods
4 | # methods that help parse and find information on a page including
5 | # access to forms etc.
6 |
7 | # grep for regexp and remove leading whitespace
8 | def grep(reg)
9 | self.body_data.grep(reg).map { |i| i.gsub(/^\s+/, "") }
10 | end
11 |
12 | # return this page's form (at index id) as a FormArray
13 | def get_form(id=nil)
14 | id = 0 if not id
15 | return nil if forms.empty? || !forms[id]
16 | f = @forms[id]
17 | action = f.action
18 | action ||= self.action
19 | url_action = @urlparse.parse(self.cur,action).to_s
20 | type = f.type
21 | FormArray.new do |x|
22 | x.set_fields(f.fields)
23 | x.action = url_action
24 | x.type = type
25 | end
26 | end
27 |
28 | # return the complete url to the form action on this page
29 | def action(id=nil)
30 | id = 0 if not id
31 | act = self.forms[id].action
32 | return self.last_effective_url if (act.nil? || act.empty?)
33 | return @urlparse.parse(self.last_effective_url,act).to_s
34 | end
35 |
36 | # return an array of Element objects for an xpath search
37 | def search(xpath)
38 | self.scrape.hdoc.search(xpath)
39 | end
40 |
41 | # return an array of inner_html for each \n"
270 | ret << "