├── lib ├── wwmd │ ├── page.rb │ ├── class_extensions.rb │ ├── viewstate │ │ ├── vs_stubs │ │ │ ├── vs_indexed_string.rb │ │ │ ├── vs_type.rb │ │ │ ├── vs_indexed_string_ref.rb │ │ │ ├── vs_read_types.rb │ │ │ ├── vs_pair.rb │ │ │ ├── vs_int_enum.rb │ │ │ ├── vs_triplet.rb │ │ │ ├── vs_binary_serialized.rb │ │ │ ├── vs_value.rb │ │ │ ├── vs_unit.rb │ │ │ ├── vs_list.rb │ │ │ ├── vs_string_formatted.rb │ │ │ ├── vs_string.rb │ │ │ ├── vs_array.rb │ │ │ ├── vs_string_array.rb │ │ │ ├── vs_stub_helpers.rb │ │ │ ├── vs_hashtable.rb │ │ │ ├── vs_hybrid_dict.rb │ │ │ ├── vs_read_value.rb │ │ │ └── vs_sparse_array.rb │ │ ├── vs_stubs.rb │ │ ├── viewstate_yaml.rb │ │ ├── viewstate.rb │ │ ├── viewstate_types.rb │ │ ├── viewstate_from_xml.rb │ │ ├── viewstate_utils.rb │ │ └── viewstate_deserializer_methods.rb │ ├── wwmd_puts.rb │ ├── class_extensions │ │ ├── extensions_external.rb │ │ ├── extensions_nilclass.rb │ │ ├── mixins_string_encoding.rb │ │ ├── extensions_encoding.rb │ │ ├── extensions_rbkb.rb │ │ └── extensions_base.rb │ ├── page │ │ ├── auth.rb │ │ ├── helpers.rb │ │ ├── inputs.rb │ │ ├── html2text_nokogiri.rb │ │ ├── constants.rb │ │ ├── reporting_helpers.rb │ │ ├── html2text_hpricot.rb │ │ ├── form.rb │ │ ├── parsing_convenience.rb │ │ ├── headers.rb │ │ ├── irb_helpers.rb │ │ ├── spider.rb │ │ ├── scrape.rb │ │ ├── page.rb │ │ └── form_array.rb │ ├── viewstate.rb │ ├── wwmd_utils.rb │ ├── wwmd_config.rb │ ├── urlparse.rb │ └── guid.rb └── wwmd.rb ├── spec ├── README ├── spider_csrf_test.spec ├── form_array.spec └── urlparse_test.spec ├── tasks ├── bones.rake ├── notes.rake ├── zentest.rake ├── test.rake ├── git.rake ├── post_load.rake ├── rdoc.rake ├── spec.rake ├── rubyforge.rake ├── ann.rake ├── gem.rake └── setup.rb ├── examples ├── config_example.yaml └── wwmd_example.rb ├── Rakefile ├── History.txt ├── README.rdoc └── wwmd.gemspec /lib/wwmd/page.rb: -------------------------------------------------------------------------------- 1 | require 'wwmd/wwmd_utils' 2 | require 'wwmd/wwmd_config' 3 | require 'wwmd/page/page' 4 | -------------------------------------------------------------------------------- /spec/README: -------------------------------------------------------------------------------- 1 | None of this was developed with unit tests 2 | 3 | These are tests written against new functionality 4 | -------------------------------------------------------------------------------- /lib/wwmd/class_extensions.rb: -------------------------------------------------------------------------------- 1 | Dir.glob(::File.join(::File.dirname(__FILE__),"class_extensions/","*.rb")).each { |rb| require rb } 2 | 3 | -------------------------------------------------------------------------------- /lib/wwmd/viewstate/vs_stubs/vs_indexed_string.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | class VSStubs::VSIndexedString < VSStubs::VSString 3 | def serialize; super; end 4 | def to_xml; super; end 5 | end 6 | end 7 | -------------------------------------------------------------------------------- /lib/wwmd/wwmd_puts.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | attr_accessor :console 3 | attr_accessor :debug 4 | @console = false 5 | @debug = false 6 | def putd(*args); puts *args if WWMD::debug; end 7 | def putx(*args); puts *args if WWMD::console; end 8 | def putw(*args); puts *args if WWMD::console; end 9 | end 10 | -------------------------------------------------------------------------------- /lib/wwmd/class_extensions/extensions_external.rb: -------------------------------------------------------------------------------- 1 | module REXML 2 | class Element 3 | 4 | # pretty print (indent=0) to stdout or filename [fn] 5 | def pp(fn=nil) 6 | tmp = "" 7 | self.write(tmp,0) 8 | if fn 9 | tmp.write(fn) 10 | return fn 11 | else 12 | return tmp 13 | end 14 | nil 15 | end 16 | 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /lib/wwmd/page/auth.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | class Page 3 | 4 | # does this request have an authenticate header? 5 | def auth? 6 | return false if self.code != 401 7 | count = 0 8 | self.header_data.each do |i| 9 | if i[0] =~ /www-authenticate/i 10 | count += 1 11 | end 12 | end 13 | return (count > 0) 14 | end 15 | 16 | end 17 | end 18 | -------------------------------------------------------------------------------- /lib/wwmd/class_extensions/extensions_nilclass.rb: -------------------------------------------------------------------------------- 1 | # I really hate this 2 | class NilClass#:nodoc: 3 | def empty?; return true; end 4 | def size; return 0; end 5 | def to_form; return FormArray.new([]); end 6 | def clop; return nil; end 7 | def inner_html; return nil; end 8 | def get_attribute(*args); return nil; end 9 | def grep(*args); return []; end 10 | def escape(*args); return nil; end 11 | end 12 | -------------------------------------------------------------------------------- /tasks/bones.rake: -------------------------------------------------------------------------------- 1 | 2 | if HAVE_BONES 3 | 4 | namespace :bones do 5 | 6 | desc 'Show the PROJ open struct' 7 | task :debug do |t| 8 | atr = if t.application.top_level_tasks.length == 2 9 | t.application.top_level_tasks.pop 10 | end 11 | 12 | if atr then Bones::Debug.show_attr(PROJ, atr) 13 | else Bones::Debug.show PROJ end 14 | end 15 | 16 | end # namespace :bones 17 | 18 | end # HAVE_BONES 19 | 20 | # EOF 21 | -------------------------------------------------------------------------------- /lib/wwmd/viewstate/vs_stubs/vs_type.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | class VSStubs::VSType 3 | include VSStubHelpers 4 | 5 | attr_accessor :value 6 | attr_reader :typeref 7 | attr_reader :typeval 8 | 9 | def initialize(typeref,typeval) 10 | @typeref = typeref 11 | @typeval = typeval 12 | end 13 | 14 | def serialize 15 | super # cheat opcode + typeref + typeval 16 | end 17 | 18 | def to_xml 19 | super 20 | end 21 | 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /lib/wwmd/viewstate/vs_stubs/vs_indexed_string_ref.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | class VSStubs::VSIndexedStringRef 3 | include VSStubHelpers 4 | 5 | attr_reader :value 6 | 7 | def initialize(ref) 8 | @value = ref 9 | end 10 | 11 | def serialize 12 | stack = super 13 | stack << self.write_int(@value) 14 | return stack 15 | end 16 | 17 | def to_xml 18 | xml = super 19 | xml.add_text(self.value.to_s) 20 | xml 21 | end 22 | 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /lib/wwmd/viewstate.rb: -------------------------------------------------------------------------------- 1 | require 'rubygems' 2 | require 'nokogiri' 3 | require 'rexml/document' 4 | require 'htmlentities' 5 | require 'wwmd/viewstate/viewstate_utils' 6 | module WWMD 7 | class ViewState 8 | include ViewStateUtils 9 | end 10 | end 11 | require 'wwmd/class_extensions' 12 | require 'wwmd/viewstate/viewstate' 13 | require 'wwmd/viewstate/viewstate_types' 14 | require 'wwmd/viewstate/viewstate_yaml' 15 | require 'wwmd/viewstate/viewstate_deserializer_methods' 16 | require 'wwmd/viewstate/viewstate_from_xml' 17 | require 'wwmd/viewstate/vs_stubs' 18 | -------------------------------------------------------------------------------- /lib/wwmd/viewstate/vs_stubs/vs_read_types.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | class VSStubs::VSInt16 < VSStubs::VSReadValue; end 3 | class VSStubs::VSInt32 < VSStubs::VSReadValue; end 4 | class VSStubs::VSByte < VSStubs::VSReadValue; end 5 | class VSStubs::VSChar < VSStubs::VSReadValue; end 6 | class VSStubs::VSDateTime < VSStubs::VSReadValue; end 7 | class VSStubs::VSDouble < VSStubs::VSReadValue; end 8 | class VSStubs::VSSingle < VSStubs::VSReadValue; end 9 | class VSStubs::VSColor < VSStubs::VSReadValue; end 10 | class VSStubs::VSKnownColor < VSStubs::VSReadValue; end 11 | end 12 | -------------------------------------------------------------------------------- /lib/wwmd/viewstate/vs_stubs/vs_pair.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | class VSStubs::VSPair 3 | include VSStubHelpers 4 | 5 | attr_accessor :value 6 | 7 | def initialize(obj1,obj2) 8 | @value = [] 9 | @value << obj1 10 | @value << obj2 11 | end 12 | 13 | def serialize 14 | stack = super 15 | self.value.each do |v| 16 | stack << v.serialize 17 | end 18 | return stack 19 | end 20 | 21 | def to_xml 22 | xml = super 23 | self.value.each do |v| 24 | xml.add_element(v.to_xml) 25 | end 26 | xml 27 | end 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /lib/wwmd/viewstate/vs_stubs/vs_int_enum.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | class VSStubs::VSIntEnum 3 | include VSStubHelpers 4 | 5 | attr_accessor :value 6 | attr_reader :typeref 7 | attr_reader :typeval 8 | 9 | def initialize(typeref,typeval,index) 10 | @typeref = typeref 11 | @typeval = typeval 12 | @value = index 13 | end 14 | 15 | def serialize 16 | stack = super 17 | stack << self.write_7bit_encoded_int(self.value) 18 | end 19 | 20 | def to_xml 21 | xml = super 22 | xml.add_text(self.value.to_s) 23 | xml 24 | end 25 | 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /lib/wwmd/viewstate/vs_stubs/vs_triplet.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | class VSStubs::VSTriplet 3 | include VSStubHelpers 4 | 5 | attr_accessor :value 6 | 7 | def initialize(obj1,obj2,obj3) 8 | @value = [] 9 | @value << obj1 10 | @value << obj2 11 | @value << obj3 12 | end 13 | 14 | def serialize 15 | stack = super 16 | self.value.each do |v| 17 | stack << v.serialize 18 | end 19 | return stack 20 | end 21 | 22 | def to_xml 23 | xml = super 24 | self.value.each do |v| 25 | xml.add_element(v.to_xml) 26 | end 27 | xml 28 | end 29 | 30 | end 31 | end 32 | -------------------------------------------------------------------------------- /lib/wwmd/viewstate/vs_stubs/vs_binary_serialized.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | class VSStubs::VSBinarySerialized 3 | include VSStubHelpers 4 | 5 | attr_accessor :value 6 | 7 | def initialize() 8 | @value = '' 9 | end 10 | 11 | def set(str) 12 | @value = str 13 | end 14 | 15 | def serialize 16 | stack = super 17 | stack << self.write_7bit_encoded_int(self.size) 18 | stack << self.value 19 | return stack 20 | end 21 | 22 | def to_xml 23 | xml = super 24 | xml.add_attribute("encoding","base64") 25 | xml.add_text(self.value.b64e) 26 | xml 27 | end 28 | 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /lib/wwmd/viewstate/vs_stubs/vs_value.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | class VSStubs::VSValue 3 | include VSStubHelpers 4 | 5 | attr_accessor :value 6 | 7 | # gin up all the single byte values 8 | def initialize(str) 9 | @value = str 10 | end 11 | 12 | def to_s 13 | @value.hexify 14 | end 15 | 16 | def to_sym 17 | VIEWSTATE_TYPES[opcode].to_sym 18 | end 19 | 20 | def opcode 21 | @value 22 | end 23 | 24 | def serialize 25 | super # cheat... just return opcode 26 | end 27 | 28 | def to_xml 29 | xml = super 30 | xml.add_text(self.to_sym.to_s) 31 | xml 32 | end 33 | 34 | end 35 | end 36 | -------------------------------------------------------------------------------- /lib/wwmd/viewstate/vs_stubs/vs_unit.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | class VSStubs::VSUnit 3 | include VSStubHelpers 4 | 5 | attr_reader :dword 6 | attr_reader :word 7 | attr_reader :value 8 | 9 | def initialize(dword,word) 10 | @dword = dword 11 | @word = word 12 | @value = '' 13 | end 14 | 15 | def serialize 16 | stack = super 17 | stack << write_double(self.dword) 18 | stack << write_single(self.word) 19 | return stack 20 | end 21 | 22 | def to_xml 23 | xml = super 24 | xml.add_attribute("dword",self.dword.to_s) 25 | xml.add_attribute("word",self.word.to_s) 26 | xml 27 | end 28 | 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /examples/config_example.yaml: -------------------------------------------------------------------------------- 1 | #--- 2 | :base_url: "http://www.example.com" 3 | :header_file: "./HEADERS.default" # argv 4 | :username: "username" # argv 5 | :password: "password" # argv 6 | 7 | # opts for spider (only spider local urls) 8 | :spider_local_only: true 9 | 10 | # opts for curl object 11 | # set max_redirects and follow_location (follows 302s) 12 | :follow_location: true 13 | :max_redirects: 20 14 | 15 | # --use_proxy=host:port overrides both of these settings 16 | # to use a proxy (I use burp and so should you) 17 | :use_proxy: false 18 | :proxy_url: "localhost:8080" 19 | 20 | # cookies (where are we going to save our cookies?) 21 | :enable_cookies: true 22 | :cookiejar: "./__cookiejar" 23 | 24 | #+++ 25 | -------------------------------------------------------------------------------- /lib/wwmd/viewstate/vs_stubs/vs_list.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | class VSStubs::VSList 3 | include VSStubHelpers 4 | 5 | attr_accessor :value 6 | 7 | def initialize() 8 | @value = [] 9 | end 10 | 11 | def add(obj) 12 | @value << obj 13 | end 14 | 15 | def serialize 16 | stack = super 17 | stack << self.write_7bit_encoded_int(self.size) 18 | self.value.each do |v| 19 | stack << v.serialize 20 | end 21 | return stack 22 | end 23 | 24 | def to_xml 25 | xml = super 26 | xml.add_attribute("size",self.value.size.to_s) 27 | self.value.each do |v| 28 | xml.add_element(v.to_xml) 29 | end 30 | xml 31 | end 32 | 33 | end 34 | end 35 | -------------------------------------------------------------------------------- /lib/wwmd/wwmd_utils.rb: -------------------------------------------------------------------------------- 1 | module WWMDUtils 2 | 3 | def self.header_array_from_file(filename) 4 | ret = Hash.new 5 | File.readlines(filename).each do |line| 6 | a = line.chomp.split(/\t/,2) 7 | ret[a[0]] = a[1] 8 | end 9 | return ret 10 | end 11 | 12 | def self.ranstr(len=8,digits=false) 13 | chars = ("a".."z").to_a 14 | chars += ("0".."9").to_a if digits 15 | ret = "" 16 | 1.upto(len) { |i| ret << chars[rand(chars.size-1)] } 17 | return ret 18 | end 19 | 20 | def self.rannum(len=8,hex=false) 21 | chars = ("0".."9").to_a 22 | chars += ("A".."F").to_a if hex 23 | ret = "" 24 | 1.upto(len) { |i| ret << chars[rand(chars.size-1)] } 25 | return ret 26 | end 27 | 28 | end 29 | -------------------------------------------------------------------------------- /tasks/notes.rake: -------------------------------------------------------------------------------- 1 | 2 | if HAVE_BONES 3 | 4 | desc "Enumerate all annotations" 5 | task :notes do |t| 6 | id = if t.application.top_level_tasks.length > 1 7 | t.application.top_level_tasks.slice!(1..-1).join(' ') 8 | end 9 | Bones::AnnotationExtractor.enumerate( 10 | PROJ, PROJ.notes.tags.join('|'), id, :tag => true) 11 | end 12 | 13 | namespace :notes do 14 | PROJ.notes.tags.each do |tag| 15 | desc "Enumerate all #{tag} annotations" 16 | task tag.downcase.to_sym do |t| 17 | id = if t.application.top_level_tasks.length > 1 18 | t.application.top_level_tasks.slice!(1..-1).join(' ') 19 | end 20 | Bones::AnnotationExtractor.enumerate(PROJ, tag, id) 21 | end 22 | end 23 | end 24 | 25 | end # if HAVE_BONES 26 | 27 | # EOF 28 | -------------------------------------------------------------------------------- /lib/wwmd/viewstate/vs_stubs/vs_string_formatted.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | class VSStubs::VSStringFormatted 3 | include VSStubHelpers 4 | 5 | attr_accessor :value 6 | attr_reader :typeref 7 | attr_reader :typeval 8 | 9 | def initialize(typeref,typeval,str) 10 | @typeref = typeref 11 | @typeval = typeval 12 | @value = str 13 | end 14 | 15 | def serialize 16 | stack = super 17 | stack << self.write_7bit_encoded_int(self.size) 18 | stack << self.value 19 | end 20 | 21 | def to_xml 22 | xml = super 23 | xml.add_element(VSStubs::VSString.new(self.value).to_xml) 24 | xml 25 | end 26 | 27 | def from_xml 28 | # deserialize convenience VSString properly 29 | end 30 | 31 | end 32 | end 33 | -------------------------------------------------------------------------------- /lib/wwmd/viewstate/vs_stubs/vs_string.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | class VSStubs::VSString 3 | include VSStubHelpers 4 | 5 | attr_accessor :value 6 | 7 | def initialize(val) 8 | @value = val 9 | end 10 | 11 | def serialize 12 | stack = super 13 | stack << self.write_7bit_encoded_int(self.size) 14 | stack << self.value 15 | return stack 16 | end 17 | 18 | def to_xml 19 | xml = super 20 | # emit quoted-printable if we need to 21 | if self.value =~ /[^\x20-\x7e]/ 22 | # xml.add_attribute("encoding","quoted-printable") 23 | # xml.add_text(self.value.to_qp) 24 | xml.add_attribute("encoding","urlencoded") 25 | xml.add_text(self.value.escape(/[^\x20-\x7e]/)) 26 | else 27 | xml.add_text(self.value) 28 | end 29 | xml 30 | end 31 | 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | # Look in the tasks/setup.rb file for the various options that can be 2 | # configured in this Rakefile. The .rake files in the tasks directory 3 | # are where the options are used. 4 | 5 | begin 6 | require 'bones' 7 | Bones.setup 8 | rescue LoadError 9 | begin 10 | load 'tasks/setup.rb' 11 | rescue LoadError 12 | raise RuntimeError, '### please install the "bones" gem ###' 13 | end 14 | end 15 | 16 | ensure_in_path 'lib' 17 | require 'wwmd' 18 | 19 | task :default => 'spec:run' 20 | 21 | PROJ.name = 'wwmd' 22 | PROJ.authors = 'Michael L. Tracy' 23 | PROJ.email = 'mtracy@matasano.com' 24 | PROJ.url = 'http://github.com/miketracy/wwmd/tree/master' 25 | PROJ.version = WWMD::VERSION 26 | #PROJ.rubyforge.name = 'wwmd' 27 | 28 | PROJ.spec.opts << '--color' 29 | 30 | depend_on 'ruby-debug' 31 | depend_on 'curb' 32 | depend_on 'nokogiri' 33 | 34 | # EOF 35 | -------------------------------------------------------------------------------- /lib/wwmd/viewstate/vs_stubs/vs_array.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | class VSStubs::VSArray 3 | include VSStubHelpers 4 | 5 | attr_accessor :value 6 | attr_reader :typeref 7 | attr_reader :typeval 8 | 9 | def initialize(typeref,typeval) 10 | @typeref = typeref 11 | @typeval = typeval 12 | @value = [] 13 | end 14 | 15 | def add(obj) 16 | @value << obj 17 | end 18 | 19 | def serialize 20 | stack = super 21 | stack << self.write_7bit_encoded_int(self.value.size) 22 | self.value.each do |v| 23 | stack << v.serialize 24 | end 25 | return stack 26 | end 27 | 28 | def to_xml 29 | xml = super 30 | xml.add_attribute("size", self.value.size.to_s) 31 | self.value.each do |v| 32 | xml.add_element(v.to_xml) 33 | end 34 | xml 35 | end 36 | 37 | end 38 | end 39 | -------------------------------------------------------------------------------- /tasks/zentest.rake: -------------------------------------------------------------------------------- 1 | if HAVE_ZENTEST 2 | 3 | # -------------------------------------------------------------------------- 4 | if test(?e, PROJ.test.file) or not PROJ.test.files.to_a.empty? 5 | require 'autotest' 6 | 7 | namespace :test do 8 | task :autotest do 9 | Autotest.run 10 | end 11 | end 12 | 13 | desc "Run the autotest loop" 14 | task :autotest => 'test:autotest' 15 | 16 | end # if test 17 | 18 | # -------------------------------------------------------------------------- 19 | if HAVE_SPEC_RAKE_SPECTASK and not PROJ.spec.files.to_a.empty? 20 | require 'autotest/rspec' 21 | 22 | namespace :spec do 23 | task :autotest do 24 | load '.autotest' if test(?f, '.autotest') 25 | Autotest::Rspec.run 26 | end 27 | end 28 | 29 | desc "Run the autotest loop" 30 | task :autotest => 'spec:autotest' 31 | 32 | end # if rspec 33 | 34 | end # if HAVE_ZENTEST 35 | 36 | # EOF 37 | -------------------------------------------------------------------------------- /lib/wwmd/viewstate/vs_stubs/vs_string_array.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | class VSStubs::VSStringArray 3 | include VSStubHelpers 4 | 5 | attr_accessor :value 6 | 7 | def initialize() 8 | @value = [] 9 | end 10 | 11 | def add(obj) 12 | @value << obj 13 | end 14 | 15 | def serialize 16 | stack = super 17 | stack << self.write_7bit_encoded_int(self.size) 18 | self.value.each do |v| 19 | stack << self.write_7bit_encoded_int(v.size) 20 | stack << v 21 | end 22 | return stack 23 | end 24 | 25 | def to_xml 26 | xml = super 27 | xml.add_attribute("size",self.value.size.to_s) 28 | self.value.each do |v| 29 | xml.add_element(VSStubs::VSString.new(v).to_xml) 30 | end 31 | xml 32 | end 33 | 34 | def from_xml 35 | # serliazed with VSString (for convenience) 36 | # make sure not to deserialize the opcode when you write this out 37 | end 38 | end 39 | end 40 | -------------------------------------------------------------------------------- /lib/wwmd/viewstate/vs_stubs/vs_stub_helpers.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | module VSStubHelpers 3 | include ViewStateUtils 4 | 5 | def to_sym 6 | self.class.to_s.split(":").last.gsub(/[A-Z]+/,'\1_\0').downcase[1..-1].gsub(/\Avs/,"").to_sym 7 | end 8 | 9 | def opcode 10 | return VIEWSTATE_TYPES.index(self.to_sym) 11 | end 12 | 13 | def size 14 | return @value.size 15 | end 16 | 17 | def serialize 18 | stack = "" 19 | stack << self.write_byte(self.opcode) 20 | if self.respond_to?(:typeref) 21 | stack << self.serialize_type(self.typeref,self.typeval) 22 | end 23 | return stack 24 | end 25 | 26 | def to_xml 27 | xml = REXML::Element.new(self.class.to_s.split(":").last) 28 | if self.respond_to?(:typeref) 29 | xml.add_attribute("typeref",self.typeref) 30 | xml.add_attribute("typeval",self.typeval) 31 | end 32 | # xml.add_attribute("size",self.size) 33 | xml 34 | end 35 | 36 | end 37 | end 38 | -------------------------------------------------------------------------------- /lib/wwmd/viewstate/vs_stubs.rb: -------------------------------------------------------------------------------- 1 | module WWMD::VSStubs; end 2 | require 'wwmd/viewstate/vs_stubs/vs_stub_helpers' 3 | require 'wwmd/viewstate/vs_stubs/vs_read_value' 4 | require 'wwmd/viewstate/vs_stubs/vs_read_types' 5 | require 'wwmd/viewstate/vs_stubs/vs_value' 6 | require 'wwmd/viewstate/vs_stubs/vs_array' 7 | require 'wwmd/viewstate/vs_stubs/vs_binary_serialized' 8 | require 'wwmd/viewstate/vs_stubs/vs_int_enum' 9 | require 'wwmd/viewstate/vs_stubs/vs_hashtable' 10 | require 'wwmd/viewstate/vs_stubs/vs_hybrid_dict' 11 | require 'wwmd/viewstate/vs_stubs/vs_list' 12 | require 'wwmd/viewstate/vs_stubs/vs_pair' 13 | require 'wwmd/viewstate/vs_stubs/vs_sparse_array' 14 | require 'wwmd/viewstate/vs_stubs/vs_string' 15 | require 'wwmd/viewstate/vs_stubs/vs_string_array' 16 | require 'wwmd/viewstate/vs_stubs/vs_string_formatted' 17 | require 'wwmd/viewstate/vs_stubs/vs_triplet' 18 | require 'wwmd/viewstate/vs_stubs/vs_type' 19 | require 'wwmd/viewstate/vs_stubs/vs_unit' 20 | require 'wwmd/viewstate/vs_stubs/vs_indexed_string' 21 | require 'wwmd/viewstate/vs_stubs/vs_indexed_string_ref' 22 | 23 | -------------------------------------------------------------------------------- /lib/wwmd/viewstate/vs_stubs/vs_hashtable.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | class VSStubs::VSHashtable 3 | include VSStubHelpers 4 | 5 | attr_accessor :value 6 | 7 | def initialize() 8 | @value = [] 9 | end 10 | 11 | def add(obj1,obj2) 12 | @value << [obj1,obj2] 13 | end 14 | 15 | def serialize 16 | stack = super 17 | stack << self.write_7bit_encoded_int(self.size) 18 | self.value.each do |k,v| 19 | stack << k.serialize 20 | stack << v.serialize 21 | end 22 | return stack 23 | end 24 | 25 | def to_xml 26 | xml = super 27 | xml.add_attribute("size",self.value.size.to_s) 28 | self.value.each do |k,v| 29 | pair = REXML::Element.new("Pair") 30 | key = REXML::Element.new("Key") 31 | key.add_element(k.to_xml) 32 | val = REXML::Element.new("Value") 33 | val.add_element(v.to_xml) 34 | pair.add_element(key) 35 | pair.add_element(val) 36 | xml.add_element(pair) 37 | end 38 | xml 39 | end 40 | 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /lib/wwmd/viewstate/vs_stubs/vs_hybrid_dict.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | class VSStubs::VSHybridDict 3 | include VSStubHelpers 4 | 5 | attr_accessor :value 6 | 7 | def initialize() 8 | @value = [] 9 | end 10 | 11 | def add(obj1,obj2) 12 | @value << [obj1,obj2] 13 | end 14 | 15 | def serialize 16 | stack = super 17 | stack << self.write_7bit_encoded_int(self.size) 18 | self.value.each do |k,v| 19 | stack << k.serialize 20 | stack << v.serialize 21 | end 22 | return stack 23 | end 24 | 25 | def to_xml 26 | xml = super 27 | xml.add_attribute("size",self.value.size.to_s) 28 | self.value.each do |k,v| 29 | pair = REXML::Element.new("Pair") 30 | key = REXML::Element.new("Key") 31 | key.add_element(k.to_xml) 32 | val = REXML::Element.new("Value") 33 | val.add_element(v.to_xml) 34 | pair.add_element(key) 35 | pair.add_element(val) 36 | xml.add_element(pair) 37 | end 38 | xml 39 | end 40 | 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /spec/spider_csrf_test.spec: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require 'wwmd' 3 | include WWMD 4 | require 'spec' 5 | 6 | describe Page do 7 | before(:each) do 8 | @page = Page.new({:base_url => "http://localhost"}) 9 | @spider = @page.spider 10 | @spider.csrf_token = "CsRf" 11 | end 12 | 13 | it "should remove csrf tokens from visited and queued" do 14 | url = "http://localhost/foo.php?CsRf=something&bar=baz" 15 | links = ["http://localhost/q1.php?CsRf=omg&first=FIRST"] 16 | @spider.add(url,links) 17 | @spider.visited.first.should == "http://localhost/foo.php?CsRf=&bar=baz" 18 | @spider.queued.first.should == "http://localhost/q1.php?CsRf=&first=FIRST" 19 | end 20 | 21 | it "should work normally" do 22 | url = "http://localhost/foo.php?hithere=something&bar=baz" 23 | links = ["http://localhost/q1.php?hithere=omg&first=FIRST"] 24 | @spider.add(url,links) 25 | @spider.visited.first.should == "http://localhost/foo.php?hithere=something&bar=baz" 26 | @spider.queued.first.should == "http://localhost/q1.php?hithere=omg&first=FIRST" 27 | end 28 | end 29 | -------------------------------------------------------------------------------- /tasks/test.rake: -------------------------------------------------------------------------------- 1 | 2 | if test(?e, PROJ.test.file) or not PROJ.test.files.to_a.empty? 3 | require 'rake/testtask' 4 | 5 | namespace :test do 6 | 7 | Rake::TestTask.new(:run) do |t| 8 | t.libs = PROJ.libs 9 | t.test_files = if test(?f, PROJ.test.file) then [PROJ.test.file] 10 | else PROJ.test.files end 11 | t.ruby_opts += PROJ.ruby_opts 12 | t.ruby_opts += PROJ.test.opts 13 | end 14 | 15 | if HAVE_RCOV 16 | desc 'Run rcov on the unit tests' 17 | task :rcov => :clobber_rcov do 18 | opts = PROJ.rcov.opts.dup << '-o' << PROJ.rcov.dir 19 | opts = opts.join(' ') 20 | files = if test(?f, PROJ.test.file) then [PROJ.test.file] 21 | else PROJ.test.files end 22 | files = files.join(' ') 23 | sh "#{RCOV} #{files} #{opts}" 24 | end 25 | 26 | task :clobber_rcov do 27 | rm_r 'coverage' rescue nil 28 | end 29 | end 30 | 31 | end # namespace :test 32 | 33 | desc 'Alias to test:run' 34 | task :test => 'test:run' 35 | 36 | task :clobber => 'test:clobber_rcov' if HAVE_RCOV 37 | 38 | end 39 | 40 | # EOF 41 | -------------------------------------------------------------------------------- /tasks/git.rake: -------------------------------------------------------------------------------- 1 | 2 | if HAVE_GIT 3 | 4 | namespace :git do 5 | 6 | # A prerequisites task that all other tasks depend upon 7 | task :prereqs 8 | 9 | desc 'Show tags from the Git repository' 10 | task :show_tags => 'git:prereqs' do |t| 11 | puts %x/git tag/ 12 | end 13 | 14 | desc 'Create a new tag in the Git repository' 15 | task :create_tag => 'git:prereqs' do |t| 16 | v = ENV['VERSION'] or abort 'Must supply VERSION=x.y.z' 17 | abort "Versions don't match #{v} vs #{PROJ.version}" if v != PROJ.version 18 | 19 | tag = "%s-%s" % [PROJ.name, PROJ.version] 20 | msg = "Creating tag for #{PROJ.name} version #{PROJ.version}" 21 | 22 | puts "Creating Git tag '#{tag}'" 23 | unless system "git tag -a -m '#{msg}' #{tag}" 24 | abort "Tag creation failed" 25 | end 26 | 27 | if %x/git remote/ =~ %r/^origin\s*$/ 28 | unless system "git push origin #{tag}" 29 | abort "Could not push tag to remote Git repository" 30 | end 31 | end 32 | end 33 | 34 | end # namespace :git 35 | 36 | task 'gem:release' => 'git:create_tag' 37 | 38 | end # if HAVE_GIT 39 | 40 | # EOF 41 | -------------------------------------------------------------------------------- /lib/wwmd/page/helpers.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | class Page 3 | # copy and paste from burp request windows 4 | # page object gets set with headers and url (not correct) 5 | # returns [headers,form] 6 | # form = page.from_paste 7 | 8 | def from_input(req) 9 | self.enable_cookies = false 10 | return false if not req 11 | h,b = req.chomp.split("\r\n\r\n",2) 12 | oh = h 13 | h = h.split("\r\n") 14 | m,u,p = h.shift.split(" ") 15 | return nil unless m =~ (/^(POST|GET)/) 16 | self.url = self.base_url + u 17 | self.headers_from_array(h) 18 | self.body_data = b 19 | self.set_data 20 | form = b.to_form 21 | form.action = @urlparse.parse(self.base_url, u).to_s 22 | [oh,form] 23 | end 24 | 25 | def from_file(fn) 26 | h = headers.clone 27 | ret = from_input(File.read(fn)) 28 | headers.replace(h) 29 | ret 30 | end 31 | 32 | def from_paste 33 | from_input(%x[pbpaste]) 34 | end 35 | 36 | def resp_paste 37 | self.body_data = %x[pbpaste].split("\r\n\r\n",2)[1] 38 | self.set_data 39 | end 40 | end 41 | end 42 | -------------------------------------------------------------------------------- /lib/wwmd/viewstate/viewstate_yaml.rb: -------------------------------------------------------------------------------- 1 | class String 2 | # right now I have no idea why "\x0d\x0a" is getting munged in yaml transforms 3 | # something weird helped find by timur@. double up "\r" before "\n" works 4 | # this might be mac specific and break on other platforms. I don't care. 5 | # patch not for general use do not try this at home. 6 | def to_yaml( opts = {} ) 7 | YAML::quick_emit( is_complex_yaml? ? object_id : nil, opts ) do |out| 8 | if is_binary_data? 9 | out.scalar( "tag:yaml.org,2002:binary", [self].pack("m"), :literal ) 10 | elsif ( self =~ /\r\n/ ) 11 | # out.scalar( "tag:yaml.org,2002:binary", [self].pack("m"), :literal ) 12 | out.scalar( taguri, self.gsub(/\r\n/,"\r\r\n"), :quote2 ) 13 | elsif to_yaml_properties.empty? 14 | out.scalar( taguri, self, self =~ /^:/ ? :quote2 : to_yaml_style ) 15 | else 16 | out.map( taguri, to_yaml_style ) do |map| 17 | map.add( 'str', "#{self}" ) 18 | to_yaml_properties.each do |m| 19 | map.add( m, instance_variable_get( m ) ) 20 | end 21 | end 22 | end 23 | end 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /lib/wwmd/class_extensions/mixins_string_encoding.rb: -------------------------------------------------------------------------------- 1 | =begin rdoc 2 | Place methods to character encodings here 3 | =end 4 | 5 | module WWMD 6 | # This is where character encodings should go as module methods 7 | # to be used as mixins for the String class 8 | module Encoding 9 | 10 | # String.to_utf7 mixin 11 | # (complete hack but it works) 12 | # 13 | # if all=true, encode all characters. 14 | # if all.class=Regexp encode only characters in the passed 15 | # regular expression else default to /[^0-9a-zA-Z]/ 16 | # 17 | # used by: 18 | # String.to_utf7 19 | # String.to_utf7! 20 | def to_utf7(all=nil) 21 | if all.kind_of?(Regexp) 22 | reg = all 23 | elsif all.kind_of?(TrueClass) 24 | reg = ESCAPE[:all] 25 | else 26 | reg = ESCAPE[:nalnum] || /[^a-zA-Z0-9]/ 27 | end 28 | putd "DEBG:" + reg.inspect 29 | ret = '' 30 | self.each_byte do |b| 31 | if b.chr.match(reg) 32 | ret += "+" + Base64.encode64(b.chr.toutf16)[0..2] + "-" 33 | else 34 | ret += b.chr 35 | end 36 | end 37 | return ret 38 | end 39 | end 40 | end 41 | -------------------------------------------------------------------------------- /tasks/post_load.rake: -------------------------------------------------------------------------------- 1 | 2 | # This file does not define any rake tasks. It is used to load some project 3 | # settings if they are not defined by the user. 4 | 5 | PROJ.exclude << ["^#{Regexp.escape(PROJ.ann.file)}$", 6 | "^#{Regexp.escape(PROJ.ignore_file)}$", 7 | "^#{Regexp.escape(PROJ.rdoc.dir)}/", 8 | "^#{Regexp.escape(PROJ.rcov.dir)}/"] 9 | 10 | flatten_arrays = lambda do |this,os| 11 | os.instance_variable_get(:@table).each do |key,val| 12 | next if key == :dependencies \ 13 | or key == :development_dependencies 14 | case val 15 | when Array; val.flatten! 16 | when OpenStruct; this.call(this,val) 17 | end 18 | end 19 | end 20 | flatten_arrays.call(flatten_arrays,PROJ) 21 | 22 | PROJ.changes ||= paragraphs_of(PROJ.history_file, 0..1).join("\n\n") 23 | 24 | PROJ.description ||= paragraphs_of(PROJ.readme_file, 'description').join("\n\n") 25 | 26 | PROJ.summary ||= PROJ.description.split('.').first 27 | 28 | PROJ.gem.files ||= manifest 29 | 30 | PROJ.gem.executables ||= PROJ.gem.files.find_all {|fn| fn =~ %r/^bin/} 31 | 32 | PROJ.rdoc.main ||= PROJ.readme_file 33 | 34 | # EOF 35 | -------------------------------------------------------------------------------- /lib/wwmd/viewstate/vs_stubs/vs_read_value.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | class VSStubs::VSReadValue 3 | include VSStubHelpers 4 | 5 | attr_accessor :value 6 | 7 | def initialize(val) 8 | @value = val 9 | end 10 | 11 | def serialize 12 | stack = super 13 | case self.to_sym 14 | when :int16; stack << self.write_short(self.value) 15 | when :int32; stack << self.write_7bit_encoded_int(self.value) 16 | when :byte; stack << self.write_byte(self.value) 17 | when :char; stack << self.write_byte(self.value) 18 | when :date_time; stack << self.write_double(self.value) 19 | when :double; stack << self.write_double(self.value) 20 | when :single; stack << self.write_single(self.value) 21 | when :color; stack << self.write_int32(self.value) 22 | when :known_color; stack << self.write_7bit_encoded_int(self.value) 23 | else; raise "unimplemented #{self.to_sym}" 24 | end 25 | return stack 26 | end 27 | 28 | def to_xml 29 | xml = super 30 | xml.add_text(self.value.to_s) 31 | xml 32 | end 33 | 34 | end 35 | end 36 | -------------------------------------------------------------------------------- /lib/wwmd/page/inputs.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | class Inputs 3 | attr_accessor :elems 4 | 5 | @cobj = '' # wwmd object 6 | @elems = '' # array of elems parse out by self.new() 7 | 8 | def initialize(*args) 9 | @cobj = args.shift 10 | end 11 | 12 | def show 13 | putx @elems 14 | end 15 | 16 | # call me from Page.set_data 17 | def set 18 | @elems = [@cobj.search("//input").map,@cobj.search("//select").map].flatten 19 | end 20 | 21 | def get(attr=nil) 22 | @elems.map { |x| x[attr] }.reject { |y| y.nil? } 23 | end 24 | 25 | # 26 | # return: FormArray containing all page inputs 27 | def form 28 | ret = {} 29 | @elems.map do |x| 30 | name = x['name'] 31 | id = x['id'] 32 | next if (name.nil? && id.nil?) 33 | value = x['value'] 34 | type = x['type'] 35 | ret[name] = value 36 | ret[id] = value if ((id || name) != name) 37 | end 38 | return FormArray.new(ret) 39 | end 40 | 41 | # 42 | # return: FormArray containing get params 43 | def params 44 | return FormArray.new(@cobj.cur.clop.to_form) 45 | end 46 | end 47 | end 48 | -------------------------------------------------------------------------------- /spec/form_array.spec: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require 'wwmd' 3 | include WWMD 4 | require 'spec' 5 | 6 | describe FormArray do 7 | before(:each) do 8 | @form = FormArray.new 9 | end 10 | 11 | it "sets a value and reads a value" do 12 | @form["foo"] = "bar" 13 | @form["foo"].should == "bar" 14 | end 15 | 16 | it "reads from a string" do 17 | @form = "foo=bar&baz=eep&argle=bargle".to_form 18 | @form["foo"].should == "bar" 19 | @form["baz"].should == "eep" 20 | @form["argle"].should == "bargle" 21 | end 22 | 23 | it "to_get" do 24 | str = "foo=bar&baz=eep&argle=bargle" 25 | get = "?" + str 26 | @form = str.to_form 27 | @form.to_get.should == get 28 | end 29 | 30 | it "remove_nulls!" do 31 | @form["var1"] = "not null" 32 | @form["var2"] = "" 33 | @form["var3"] = nil 34 | @form.remove_nulls! 35 | @form.size.should == 1 36 | @form["var1"].should == "not null" 37 | end 38 | 39 | it "clones correctly" do 40 | @form = "foo=bar&baz=eep&argle=bargle".to_form 41 | lform = @form.clone 42 | lform["foo"] = "test" 43 | @form["foo"].should == "bar" 44 | lform["foo"].should == "test" 45 | end 46 | 47 | it "escapes characters correctly" 48 | it "unescapes characters correctly" 49 | end 50 | -------------------------------------------------------------------------------- /History.txt: -------------------------------------------------------------------------------- 1 | == 0.2.20 / 2009-08-24 2 | 3 | * convert ViewState to use StringIO 4 | * include iZsh changes to fix some issues 5 | * still lots to do and there's no time 6 | 7 | == 0.2.19 / 8 | 9 | * nothing to see here... move along quitely 10 | 11 | == 0.2.18 / 12 | 13 | * black hat special 14 | 15 | == 0.2.17 / 2009-06-22 16 | 17 | * lots happening between here and .9 18 | * viewstate refactor complete 19 | * clean up page/page.rb 20 | * cleaning up page/headers.rb 21 | * cleaning up page/scrape.rb 22 | * FormArray refactor includes the form action (full URL) 23 | * page.submit(page.get_form) 24 | * still bugs in URLParse but hunting them down throw by throw 25 | * remove broken NTLM (preserve auth header warnings) 26 | * remove WWMDConfig in favor of WWMD module methods but preserve old class for back compat 27 | * add some burp helpers 28 | * Page#from_paste (take entire request into Page and turn off cookies) 29 | * burp log parsing coming 30 | * Curb includes http_put (with header munging bug so careful) 31 | * internal monkey patch for Curb to do arbitrary verb tampering (not here yet) 32 | * add String#pbcopy 33 | * move lots of things around for clarity during refactor 34 | * refactor progressing but still unstable (2.0.16 gem including viewstate is good to go) 35 | 36 | == 0.2.9 / 2009-05-05 37 | 38 | * bonesify 39 | -------------------------------------------------------------------------------- /tasks/rdoc.rake: -------------------------------------------------------------------------------- 1 | 2 | require 'rake/rdoctask' 3 | 4 | namespace :doc do 5 | 6 | desc 'Generate RDoc documentation' 7 | Rake::RDocTask.new do |rd| 8 | rdoc = PROJ.rdoc 9 | rd.main = rdoc.main 10 | rd.rdoc_dir = rdoc.dir 11 | 12 | incl = Regexp.new(rdoc.include.join('|')) 13 | excl = Regexp.new(rdoc.exclude.join('|')) 14 | files = PROJ.gem.files.find_all do |fn| 15 | case fn 16 | when excl; false 17 | when incl; true 18 | else false end 19 | end 20 | rd.rdoc_files.push(*files) 21 | 22 | name = PROJ.name 23 | rf_name = PROJ.rubyforge.name 24 | 25 | title = "#{name}-#{PROJ.version} Documentation" 26 | title = "#{rf_name}'s " + title if rf_name.valid? and rf_name != name 27 | 28 | rd.options << "-t #{title}" 29 | rd.options.concat(rdoc.opts) 30 | end 31 | 32 | desc 'Generate ri locally for testing' 33 | task :ri => :clobber_ri do 34 | sh "#{RDOC} --ri -o ri ." 35 | end 36 | 37 | task :clobber_ri do 38 | rm_r 'ri' rescue nil 39 | end 40 | 41 | end # namespace :doc 42 | 43 | desc 'Alias to doc:rdoc' 44 | task :doc => 'doc:rdoc' 45 | 46 | desc 'Remove all build products' 47 | task :clobber => %w(doc:clobber_rdoc doc:clobber_ri) 48 | 49 | remove_desc_for_task %w(doc:clobber_rdoc) 50 | 51 | # EOF 52 | -------------------------------------------------------------------------------- /lib/wwmd/page/html2text_nokogiri.rb: -------------------------------------------------------------------------------- 1 | =begin rdoc 2 | html2text that works with Nokogiri 3 | =end 4 | module WWMD 5 | 6 | INLINETAGS = ['a','abbr','acronym','address','b','bdo','big','cite', 7 | 'code','del','dfn','em','font','i','ins','kbd','label', 8 | 'noframes','noscript','q','s','samp','small','span', 9 | 'strike','strong','sub','sup','td','th','tt','u', 10 | 'html','body','table'] 11 | BLOCKTAGS = ['blockquote','center','dd','div','fieldset','form', 12 | 'h1','h2','h3','h4','h5','h6','p','pre','tr','var',] 13 | LISTTAGS = ['dir','dl','menu','ol','ul'] 14 | ITEMTAGS = ['li','dt'] 15 | SPECIALTAGS = ['br','hr'] 16 | 17 | class Page 18 | def html2text 19 | arr = [] 20 | self.scrape.hdoc.traverse do |x| 21 | arr << [x.parent.name,x.text] if x.text? 22 | if x.elem? 23 | arr << [x.name,""] if SPECIALTAGS.include?(x.name) 24 | end 25 | end 26 | ret = "" 27 | arr.each do |name,str| 28 | (ret += "\n"; next ) if name == "br" 29 | (ret += "\n" + ("-" * 72) + "\n"; next) if name == "hr" 30 | s = str.strip 31 | if BLOCKTAGS.include?(name) or LISTTAGS.include?(name) 32 | s += "\n" 33 | elsif ITEMTAGS.include?(name) 34 | s = "* " + s + "\n" 35 | end 36 | ret += s 37 | end 38 | ret.gsub(/\n+/) { "\n" } 39 | ret.gsub(/[^\x20-\x7e,\n]/,"").gsub(/^\n/,"") 40 | end 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /tasks/spec.rake: -------------------------------------------------------------------------------- 1 | 2 | if HAVE_SPEC_RAKE_SPECTASK and not PROJ.spec.files.to_a.empty? 3 | require 'spec/rake/verify_rcov' 4 | 5 | namespace :spec do 6 | 7 | desc 'Run all specs with basic output' 8 | Spec::Rake::SpecTask.new(:run) do |t| 9 | t.ruby_opts = PROJ.ruby_opts 10 | t.spec_opts = PROJ.spec.opts 11 | t.spec_files = PROJ.spec.files 12 | t.libs += PROJ.libs 13 | end 14 | 15 | desc 'Run all specs with text output' 16 | Spec::Rake::SpecTask.new(:specdoc) do |t| 17 | t.ruby_opts = PROJ.ruby_opts 18 | t.spec_opts = PROJ.spec.opts + ['--format', 'specdoc'] 19 | t.spec_files = PROJ.spec.files 20 | t.libs += PROJ.libs 21 | end 22 | 23 | if HAVE_RCOV 24 | desc 'Run all specs with RCov' 25 | Spec::Rake::SpecTask.new(:rcov) do |t| 26 | t.ruby_opts = PROJ.ruby_opts 27 | t.spec_opts = PROJ.spec.opts 28 | t.spec_files = PROJ.spec.files 29 | t.libs += PROJ.libs 30 | t.rcov = true 31 | t.rcov_dir = PROJ.rcov.dir 32 | t.rcov_opts = PROJ.rcov.opts + ['--exclude', 'spec'] 33 | end 34 | 35 | RCov::VerifyTask.new(:verify) do |t| 36 | t.threshold = PROJ.rcov.threshold 37 | t.index_html = File.join(PROJ.rcov.dir, 'index.html') 38 | t.require_exact_threshold = PROJ.rcov.threshold_exact 39 | end 40 | 41 | task :verify => :rcov 42 | remove_desc_for_task %w(spec:clobber_rcov) 43 | end 44 | 45 | end # namespace :spec 46 | 47 | desc 'Alias to spec:run' 48 | task :spec => 'spec:run' 49 | 50 | task :clobber => 'spec:clobber_rcov' if HAVE_RCOV 51 | 52 | end # if HAVE_SPEC_RAKE_SPECTASK 53 | 54 | # EOF 55 | -------------------------------------------------------------------------------- /lib/wwmd/viewstate/vs_stubs/vs_sparse_array.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | class VSStubs::VSSparseArray 3 | include VSStubHelpers 4 | 5 | attr_accessor :value 6 | attr_reader :typeref 7 | attr_reader :typeval 8 | attr_reader :size 9 | attr_reader :elems 10 | 11 | def initialize(typeref,typeval,size,elems) 12 | @typeref = typeref 13 | @typeval = typeval 14 | @size = size 15 | @elems = elems 16 | @value = [] 17 | end 18 | 19 | def add(idx,obj) 20 | @value[idx] = obj 21 | end 22 | 23 | def serialize 24 | stack = super 25 | stack << self.write_7bit_encoded_int(self.size) 26 | stack << self.write_7bit_encoded_int(self.elems) 27 | self.value.each_index do |i| 28 | next if self.value[i].nil? 29 | stack << self.write_7bit_encoded_int(i) 30 | stack << self.value[i].serialize 31 | end 32 | return stack 33 | end 34 | 35 | def to_xml 36 | xml = super 37 | siz = REXML::Element.new("Size") 38 | siz.add_text(self.size.to_s) 39 | ele = REXML::Element.new("Elements") 40 | ele.add_text(self.elems.to_s) 41 | xml.add_element(siz) 42 | xml.add_element(ele) 43 | self.value.each_index do |i| 44 | next if self.value[i].nil? 45 | pair = REXML::Element.new("Pair") 46 | idx = REXML::Element.new("Index") 47 | idx.add_text(i.to_s) 48 | val = REXML::Element.new("Value") 49 | val.add_element(value[i].to_xml) 50 | pair.add_element(idx) 51 | pair.add_element(val) 52 | xml.add_element(pair) 53 | end 54 | xml 55 | end 56 | 57 | end 58 | end 59 | -------------------------------------------------------------------------------- /lib/wwmd/class_extensions/extensions_encoding.rb: -------------------------------------------------------------------------------- 1 | require 'wwmd/class_extensions/mixins_string_encoding' 2 | class String 3 | include WWMD::Encoding 4 | 5 | @@he = HTMLEntities.new 6 | 7 | # base 64 decode 8 | def b64d 9 | self.unpack("m").first 10 | end 11 | 12 | # base 64 encode 13 | def b64e 14 | [self].pack("m").gsub("\n","") 15 | end 16 | 17 | # URI.escape using defaults or passed regexp 18 | def escape(reg=nil,unicodify=false) 19 | if reg.nil? 20 | ret = URI.escape(self) 21 | elsif reg.kind_of?(Symbol) 22 | case reg 23 | when :none; return self 24 | when :default; ret = URI.escape(self) 25 | else; ret = URI.escape(self,WWMD::ESCAPE[reg]) 26 | end 27 | else 28 | ret = URI.escape(self,reg) 29 | end 30 | if unicodify 31 | ret.gsub!(/%/,"%u00") 32 | end 33 | return ret 34 | end 35 | 36 | # URI.escape 37 | def escape_url(reg=WWMD::ESCAPE[:url])#:nodoc: 38 | self.escape(reg) 39 | end 40 | 41 | def escape_xss(reg=WWMD::ESCAPE[:xss])#:nodoc: 42 | self.escape(reg) 43 | end 44 | 45 | def escape_default(reg=WWMD::ESCAPE[:default]) 46 | self.escape(reg) 47 | end 48 | # URI.escape all characters in string 49 | def escape_all#:nodoc: 50 | self.escape(/.*/) 51 | end 52 | 53 | # URI.unescape 54 | def unescape 55 | URI.unescape(self) 56 | end 57 | 58 | # html entity encode string 59 | # sym = :basic :named :decimal :hexadecimal 60 | def eencode(sym=nil) 61 | sym = :named if sym.nil? 62 | @@he.encode(self,sym) 63 | end 64 | 65 | # decode html entities in string 66 | def edecode 67 | return @@he.decode(self) 68 | end 69 | 70 | # quoted printable 71 | def to_qp 72 | [self].pack("M") 73 | end 74 | 75 | def from_qp 76 | self.unpack("M").first 77 | end 78 | 79 | end 80 | -------------------------------------------------------------------------------- /tasks/rubyforge.rake: -------------------------------------------------------------------------------- 1 | 2 | if PROJ.rubyforge.name.valid? && HAVE_RUBYFORGE 3 | 4 | require 'rubyforge' 5 | require 'rake/contrib/sshpublisher' 6 | 7 | namespace :gem do 8 | desc 'Package and upload to RubyForge' 9 | task :release => [:clobber, 'gem'] do |t| 10 | v = ENV['VERSION'] or abort 'Must supply VERSION=x.y.z' 11 | abort "Versions don't match #{v} vs #{PROJ.version}" if v != PROJ.version 12 | pkg = "pkg/#{PROJ.gem._spec.full_name}" 13 | 14 | if $DEBUG then 15 | puts "release_id = rf.add_release #{PROJ.rubyforge.name.inspect}, #{PROJ.name.inspect}, #{PROJ.version.inspect}, \"#{pkg}.tgz\"" 16 | puts "rf.add_file #{PROJ.rubyforge.name.inspect}, #{PROJ.name.inspect}, release_id, \"#{pkg}.gem\"" 17 | end 18 | 19 | rf = RubyForge.new 20 | rf.configure rescue nil 21 | puts 'Logging in' 22 | rf.login 23 | 24 | c = rf.userconfig 25 | c['release_notes'] = PROJ.description if PROJ.description 26 | c['release_changes'] = PROJ.changes if PROJ.changes 27 | c['preformatted'] = true 28 | 29 | files = Dir.glob("#{pkg}*.*") 30 | 31 | puts "Releasing #{PROJ.name} v. #{PROJ.version}" 32 | rf.add_release PROJ.rubyforge.name, PROJ.name, PROJ.version, *files 33 | end 34 | end # namespace :gem 35 | 36 | 37 | namespace :doc do 38 | desc "Publish RDoc to RubyForge" 39 | task :release => %w(doc:clobber_rdoc doc:rdoc) do 40 | config = YAML.load( 41 | File.read(File.expand_path('~/.rubyforge/user-config.yml')) 42 | ) 43 | 44 | host = "#{config['username']}@rubyforge.org" 45 | remote_dir = "/var/www/gforge-projects/#{PROJ.rubyforge.name}/" 46 | remote_dir << PROJ.rdoc.remote_dir if PROJ.rdoc.remote_dir 47 | local_dir = PROJ.rdoc.dir 48 | 49 | Rake::SshDirPublisher.new(host, remote_dir, local_dir).upload 50 | end 51 | end # namespace :doc 52 | 53 | end # if HAVE_RUBYFORGE 54 | 55 | # EOF 56 | -------------------------------------------------------------------------------- /lib/wwmd/wwmd_config.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | 3 | class WWMDConfig#:nodoc: 4 | # for backward compat 5 | def self.load_config(file); WWMD::load_config(file); end 6 | def self.parse_opts(args); WWMD::parse_opts(args); end 7 | end 8 | 9 | def load_config(file) 10 | begin 11 | config = YAML.load_file(file) 12 | rescue => e 13 | putw "config file not found #{file}" 14 | putw e.inspect 15 | exit 16 | end 17 | return config 18 | end 19 | 20 | def parse_opts(args) 21 | inopts = Hash.new 22 | inopts[:max_redirects] = 10 23 | inopts[:timeout] = 30 24 | inopts[:scrape_warn] = false 25 | opts = OptionParser.new do |opts| 26 | # set defaults 27 | opts.on("-p", "--password PASSWORD", "Password") { |v| inopts[:password] = v } 28 | opts.on("-u", "--username USERNAME", "Username") { |v| inopts[:username] = v } 29 | opts.on("--header_file HEADER_FILE","Header file") { |v| inopts[:header_file] = v } 30 | opts.on("--base_url BASE_URL","Base url") { |v| inopts[:base_url] = v } 31 | opts.on("--use_proxy PROXY_URL", "Use proxy at url") do |v| 32 | ENV['HTTP_PROXY'] = "http://" + v.to_s 33 | inopts[:use_proxy] = true 34 | inopts[:proxy_url] = v 35 | end 36 | opts.on("--no_proxy","do not use proxy") do |v| 37 | inopts[:use_proxy] = false 38 | inopts[:proxy_url] = nil 39 | end 40 | opts.on("--use_auth","login before getting url") { |v| inopts[:use_auth] = true } 41 | opts.on("--no_auth","no login before getting url") { |v| inopts[:use_auth] = false } 42 | opts.on("--debug","debugging really doesn't work") { |v| inopts[:debug] = true } 43 | opts.on_tail("-h", "--help", "Show this message") do 44 | puts opts 45 | exit 46 | end 47 | end 48 | opts.parse!(args) 49 | return inopts 50 | end 51 | 52 | end 53 | -------------------------------------------------------------------------------- /lib/wwmd/page/constants.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | XSSFISH = "<;'\"}()[]>{" 3 | 4 | DEFAULTS = { 5 | :base_url => "", 6 | :use_auth => true, 7 | :enable_cookies => true, 8 | :cookiejar => "./__cookiejar", 9 | :follow_location => true, 10 | :max_redirects => 20, 11 | :use_proxy => false, 12 | :debug => false, 13 | :scrape_warn => true, 14 | :parse => true, 15 | :timeout => 20, 16 | } 17 | 18 | ESCAPE = { 19 | :url => /[^a-zA-Z0-9\-_%]/, 20 | :nalnum => /[^a-zA-Z0-9]/, 21 | :xss => /[^a-zA-Z0-9=?()']/, 22 | :ltgt => /[<>]/, 23 | :all => /.*/, 24 | # :b64 => /[=+\/]/, 25 | :b64 => /[^a-zA-Z0-9]/, 26 | :none => :none, 27 | :default => :default, 28 | } 29 | 30 | UA = { 31 | :mozilla => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.1.16) Gecko/20080702 Firefox/2.0.0.16", 32 | :moz3 => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.0.1) Gecko/2008070206 Firefox/3.0.1", 33 | :ie6 => "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)", 34 | :ie7 => "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)", 35 | :ie8 => "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)", 36 | :opera => "Opera/9.20 (Windows NT 6.0; U; en)", 37 | :safari => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; en) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.22", 38 | :safari4 => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_7; en-us) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Safari/530.17", 39 | :wwmd => "Mozilla/5.0 (compatible; WWMD #{WWMD::VERSION}; o_hai)" 40 | } 41 | 42 | DEFAULT_HEADERS = { 43 | "User-Agent" => UA[:wwmd], 44 | "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 45 | "Accept-Language" => "en-US,en;q=0.8,en-au;q=0.6,en-us;q=0.4,en;q=0.2", 46 | "Accept-Encoding" => "gzip,deflate", 47 | "Accept-Charset" => "SO-8859-1,utf-8;q=0.7,*;q=0.7", 48 | "Keep-Alive" => "300", 49 | "Connection" => "keep-alive", 50 | } 51 | 52 | HEADERS = { 53 | :default => nil, 54 | :utf7 => { 55 | "Content-Type" => "application/x-www-form-urlencoded;charset=UTF-7", 56 | "Content-Transfer-Encoding" => "7bit", 57 | }, 58 | :ajax => { 59 | "X-Requested-With" => "XMLHttpRequest", 60 | "X-Prototype-Version" => "1.5.0", 61 | }, 62 | } 63 | end 64 | -------------------------------------------------------------------------------- /lib/wwmd.rb: -------------------------------------------------------------------------------- 1 | # third-party 2 | require 'rubygems' 3 | unless self.respond_to?(:java) 4 | require 'ruby-debug' 5 | require 'curb' 6 | else 7 | # I_KNOW_I_AM_USING_AN_OLD_AND_BUGGY_VERSION_OF_LIBXML2 = true 8 | # require 'curb_ffi' 9 | # include CurbFfi 10 | end 11 | require 'yaml' 12 | require 'fileutils' 13 | require 'base64' 14 | require 'optparse' 15 | require 'digest' 16 | require 'uri' 17 | require 'htmlentities' 18 | require 'nkf' 19 | require 'rexml/document' 20 | 21 | module WWMD 22 | 23 | # :stopdoc: 24 | VERSION = "0.2.20.1" 25 | PARSER = :nokogiri # :nokogiri || :hpricot 26 | LIBPATH = ::File.expand_path(::File.dirname(__FILE__)) + ::File::SEPARATOR 27 | PATH = ::File.dirname(LIBPATH) + ::File::SEPARATOR 28 | # :startdoc: 29 | 30 | # Returns the version string for the library. 31 | # 32 | def self.version 33 | VERSION 34 | end 35 | 36 | # Returns the library path for the module. If any arguments are given, 37 | # they will be joined to the end of the libray path using 38 | # File.join. 39 | # 40 | def self.libpath( *args ) 41 | args.empty? ? LIBPATH : ::File.join(LIBPATH, args.flatten) 42 | end 43 | 44 | # Returns the lpath for the module. If any arguments are given, 45 | # they will be joined to the end of the path using 46 | # File.join. 47 | # 48 | def self.path( *args ) 49 | args.empty? ? PATH : ::File.join(PATH, args.flatten) 50 | end 51 | 52 | # Utility method used to require all files ending in .rb that lie in the 53 | # directory below this file that has the same name as the filename passed 54 | # in. Optionally, a specific _directory_ name can be passed in such that 55 | # the _filename_ does not have to be equivalent to the directory. 56 | # 57 | def self.require_all_libs_relative_to( fname, dir = nil ) 58 | dir ||= ::File.basename(fname, '.*') 59 | search_me = ::File.expand_path( 60 | ::File.join(::File.dirname(fname), dir, '**', '*.rb')) 61 | 62 | Dir.glob(search_me).sort.each do |rb| 63 | next if rb =~ /html2text_/ 64 | require rb 65 | end 66 | end 67 | 68 | end # module WWMD 69 | 70 | WWMD.require_all_libs_relative_to(__FILE__) 71 | 72 | # special case parser 73 | 74 | if WWMD::PARSER == :nokogiri 75 | require 'nokogiri' 76 | WWMD::HDOC = Nokogiri::HTML 77 | require 'wwmd/page/html2text_nokogiri' 78 | else 79 | require 'hpricot' 80 | WWMD::HDOC = Hpricot 81 | require 'wwmd/page/html2text_hpricot' 82 | end 83 | 84 | # EOF 85 | -------------------------------------------------------------------------------- /examples/wwmd_example.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require 'rubygems' 3 | require 'wwmd' 4 | include WWMD 5 | 6 | $stop = lambda { Debugger.breakpoint; Debugger.catchpoint } 7 | 8 | module WWMD 9 | class Page 10 | # here we add directly to Page.login instead of creating an outside 11 | # helper class. Normally we create a mixin script for this. 12 | def login 13 | self.get(self.opts[:base_url]) ;# GET the main page redirects to /login 14 | form = self.get_form ;# get the login form 15 | if form.nil? then ;# did we actually get a form? 16 | puts "WARN: No login form on base page" 17 | return (self.logged_in = false) 18 | end 19 | form.set("name",self.opts[:username]) ;# set login form variables from config 20 | form.set("password",self.opts[:password]) 21 | self.url = self.action ;# set the url to submit to to the form action 22 | self.submit(form) ;# submit the form 23 | 24 | # perform some check to make sure we aren't still on the login page 25 | # (this naively checks to make sure we don't have any password fields on the current page 26 | self.logged_in = (self.search("//input[@type='password']").size == 0) 27 | end 28 | end 29 | end 30 | 31 | # parse options and load configuration file 32 | inopts = WWMDConfig.parse_opts(ARGV) 33 | conf = ARGV[0] || "./config_example.yaml" 34 | opts = WWMDConfig.load_config(conf) 35 | inopts.each_pair { |k,v| opts[k] = v } 36 | $opts = opts 37 | 38 | # create our Page object and name it page 39 | page = Page.new(opts) 40 | page.scrape.warn = false ;# don't complain about not overwriting scrape 41 | 42 | # move our spider object up here 43 | spider = page.spider 44 | 45 | # output current configuration 46 | puts "current configuration:\n" 47 | page.opts.each_pair { |k,v| 48 | if k == :password then 49 | puts "#{k} :: ********" 50 | else 51 | puts "#{k} :: #{v}" 52 | end 53 | } 54 | puts "\n" 55 | 56 | # use the Helper method to login to the application 57 | if page.opts[:use_auth] then 58 | page.login 59 | if page.logged_in? then 60 | puts "logged in as #{opts[:username]}" 61 | else 62 | puts "WARN: could not log in" if !page.logged_in? 63 | end 64 | else 65 | page.get opts[:base_url] 66 | end 67 | 68 | # report our current location and let's drop to irb with 69 | # our whole context complete 70 | puts "current location: #{page.current}" 71 | puts "enter \"irb\" to go to the console" 72 | 73 | $stop.call 74 | -------------------------------------------------------------------------------- /tasks/ann.rake: -------------------------------------------------------------------------------- 1 | 2 | begin 3 | require 'bones/smtp_tls' 4 | rescue LoadError 5 | require 'net/smtp' 6 | end 7 | require 'time' 8 | 9 | namespace :ann do 10 | 11 | # A prerequisites task that all other tasks depend upon 12 | task :prereqs 13 | 14 | file PROJ.ann.file do 15 | ann = PROJ.ann 16 | puts "Generating #{ann.file}" 17 | File.open(ann.file,'w') do |fd| 18 | fd.puts("#{PROJ.name} version #{PROJ.version}") 19 | fd.puts(" by #{Array(PROJ.authors).first}") if PROJ.authors 20 | fd.puts(" #{PROJ.url}") if PROJ.url.valid? 21 | fd.puts(" (the \"#{PROJ.release_name}\" release)") if PROJ.release_name 22 | fd.puts 23 | fd.puts("== DESCRIPTION") 24 | fd.puts 25 | fd.puts(PROJ.description) 26 | fd.puts 27 | fd.puts(PROJ.changes.sub(%r/^.*$/, '== CHANGES')) 28 | fd.puts 29 | ann.paragraphs.each do |p| 30 | fd.puts "== #{p.upcase}" 31 | fd.puts 32 | fd.puts paragraphs_of(PROJ.readme_file, p).join("\n\n") 33 | fd.puts 34 | end 35 | fd.puts ann.text if ann.text 36 | end 37 | end 38 | 39 | desc "Create an announcement file" 40 | task :announcement => ['ann:prereqs', PROJ.ann.file] 41 | 42 | desc "Send an email announcement" 43 | task :email => ['ann:prereqs', PROJ.ann.file] do 44 | ann = PROJ.ann 45 | from = ann.email[:from] || Array(PROJ.authors).first || PROJ.email 46 | to = Array(ann.email[:to]) 47 | 48 | ### build a mail header for RFC 822 49 | rfc822msg = "From: #{from}\n" 50 | rfc822msg << "To: #{to.join(',')}\n" 51 | rfc822msg << "Subject: [ANN] #{PROJ.name} #{PROJ.version}" 52 | rfc822msg << " (#{PROJ.release_name})" if PROJ.release_name 53 | rfc822msg << "\n" 54 | rfc822msg << "Date: #{Time.new.rfc822}\n" 55 | rfc822msg << "Message-Id: " 56 | rfc822msg << "<#{"%.8f" % Time.now.to_f}@#{ann.email[:domain]}>\n\n" 57 | rfc822msg << File.read(ann.file) 58 | 59 | params = [:server, :port, :domain, :acct, :passwd, :authtype].map do |key| 60 | ann.email[key] 61 | end 62 | 63 | params[3] = PROJ.email if params[3].nil? 64 | 65 | if params[4].nil? 66 | STDOUT.write "Please enter your e-mail password (#{params[3]}): " 67 | params[4] = STDIN.gets.chomp 68 | end 69 | 70 | ### send email 71 | Net::SMTP.start(*params) {|smtp| smtp.sendmail(rfc822msg, from, to)} 72 | end 73 | end # namespace :ann 74 | 75 | desc 'Alias to ann:announcement' 76 | task :ann => 'ann:announcement' 77 | 78 | CLOBBER << PROJ.ann.file 79 | 80 | # EOF 81 | -------------------------------------------------------------------------------- /lib/wwmd/page/reporting_helpers.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | class Page 3 | attr_accessor :status 4 | #:section: Reporting helper methods 5 | # These are methods that generate data for a parsed page 6 | 7 | # return text representation of page code 8 | # 9 | # override with specific statuses in helper depending on page text 10 | # etc to include statuses outside 200 = OK and other = ERR 11 | def page_status 12 | @status = "OK" 13 | @status = "ERR" if self.response_code > 399 14 | end 15 | 16 | # alias_method :status, :page_status#:nodoc: 17 | 18 | # return value of @logged_in 19 | def logged_in? 20 | return @logged_in 21 | end 22 | 23 | # return a string of flags: 24 | # Ll links 25 | # Jj javascript includes 26 | # Ff forms 27 | # Cc comments 28 | def report_flags 29 | self.has_links? ? ret = "L" : ret = "l" 30 | self.has_jlinks? ? ret += "J" : ret += "j" 31 | self.has_form? ? ret += "F" : ret += "f" 32 | self.has_comments? ? ret += "C" : ret += "c" 33 | return ret 34 | end 35 | 36 | def has_links?; return !@links.empty?; end 37 | def has_jlinks?; return !@jlinks.empty?; end 38 | def has_form?; return !(@forms.size < 1); end 39 | def has_comments?; return !@comments.empty?; end 40 | 41 | # return page size in bytes 42 | def size 43 | return self.body_data.size 44 | end 45 | 46 | # return md5sum for self.body_data 47 | def md5 48 | return self.body_data.md5 49 | end 50 | 51 | # does this response have SET-COOKIE headers? 52 | def set_cookies? 53 | ret = FormArray.new() 54 | self.header_data.each do |x| 55 | if x[0].upcase == "SET-COOKIE" 56 | ret << x[1].split(";").first.split("=",2) 57 | end 58 | end 59 | ret 60 | end 61 | alias_method :set_cookies, :set_cookies? 62 | 63 | def time 64 | self.total_time 65 | end 66 | 67 | # return MD5 for DOM fingerprint 68 | # take all tag names in page.to_s.md5 69 | def fingerprint 70 | self.all_tags.to_s.md5 71 | end 72 | alias_method :fp, :fingerprint #:nodoc: 73 | 74 | # alias_method for last_effective_url 75 | def current_url 76 | self.last_effective_url 77 | end 78 | 79 | alias_method :current, :current_url 80 | alias_method :cur, :current_url 81 | alias_method :now, :current_url 82 | 83 | # the last http response code 84 | def code 85 | self.response_code # .to_s 86 | end 87 | 88 | end 89 | end 90 | -------------------------------------------------------------------------------- /lib/wwmd/page/html2text_hpricot.rb: -------------------------------------------------------------------------------- 1 | # Geoff Davis geoff at geoffdavis.net 2 | # Wed May 2 20:08:44 EDT 2007 3 | # http://rubyforge.org/pipermail/raleigh-rb-members/2007-May/000789.html 4 | # modified by mtracy at matasano.com for WWMD 5 | 6 | module WWMD 7 | InlineTags = ['a','abbr','acronym','address','b','bdo','big','cite','code','del','dfn','em','font','i','ins','kbd','label','noframes','noscript','q','s','samp','small','span','strike','strong','sub','sup','td','th','tt','u','html','body','table'] 8 | BlockTags = ['blockquote','br','center','dd','div','fieldset','form','h1','h2','h3', 'h4','h5','h6','hr','p','pre','tr','var',] 9 | ListTags = ['dir','dl','menu','ol','ul'] 10 | ItemTags = ['li','dt'] 11 | # AsciiEquivalents = {"amp"=>"&","bull"=>"*","copy"=>"(c)","laquo"=>"<<","raquo"=>">>","ge"=> ">=","le"=>"<=","mdash"=>"-","ndash"=>"-","plusmn"=>"+/-","times"=>"x"} 12 | 13 | # NamedCharRegex = Regexp.new("(&("+Hpricot::NamedCharacters.keys.join("|")+");)") 14 | 15 | class Page 16 | def element_to_text(n) 17 | tag = n.etag || n.stag 18 | name = tag.name.downcase 19 | s = "" 20 | is_block = BlockTags.include?(name) 21 | is_list = ListTags.include?(name) 22 | is_item = ItemTags.include?(name) 23 | is_inline = InlineTags.include?(name) 24 | if is_block or is_list or is_item or is_inline 25 | n.each_child do |c| 26 | s += node_to_text(c) 27 | end 28 | if is_block or is_list 29 | s += "\n" 30 | elsif is_item 31 | s = "* " + s + "\n" 32 | end 33 | end 34 | s 35 | end 36 | 37 | def node_to_text(n) 38 | return "" if n.comment? 39 | return element_to_text(n) if n.elem? 40 | return n.inner_text if n.text? 41 | 42 | s = "" 43 | begin 44 | n.each_child do |c| 45 | s += node_to_text(c) 46 | end 47 | rescue => e 48 | putw "WARN: #{e.inspect}" 49 | end 50 | return s 51 | end 52 | 53 | # def lookup_named_char(s) 54 | # c = Hpricot::NamedCharacters[s[1...-1]] 55 | # c.chr if c 56 | # end 57 | 58 | def html2text 59 | doc = self.scrape.hdoc 60 | text = node_to_text(doc) 61 | # text.gsub!(NamedCharRegex){|s| "#{lookup_named_char(s)}"} 62 | # clean up white space 63 | text.gsub!("\r"," ") 64 | text.squeeze!(" ") 65 | text.strip! 66 | ret = '' 67 | text.split(/\n/).each do |l| 68 | l.strip! 69 | next if l == '' 70 | next if l =~ /^\?+$/ 71 | ret += "#{l}\n" 72 | end 73 | return ret 74 | end 75 | end 76 | end 77 | -------------------------------------------------------------------------------- /lib/wwmd/page/form.rb: -------------------------------------------------------------------------------- 1 | =begin rdoc 2 | =end 3 | module WWMD 4 | # == original author of hpricot_form 5 | # 6 | # Chew Choon Keat 7 | # http://blog.yanime.org/ 8 | # 19 July 2006 9 | # 10 | # updated by mtracy at matasano.com for use with Nokogiri and WWMD 11 | # 12 | class Form 13 | attr_accessor :hdoc 14 | attr_accessor :fields 15 | attr_accessor :formtag 16 | 17 | def initialize(doc) 18 | @hdoc = doc 19 | @formtag = @hdoc.search("//form") 20 | end 21 | 22 | def method_missing(*args) 23 | hdoc.send(*args) 24 | end 25 | 26 | alias_method :old_fields, :fields 27 | def fields 28 | if PARSER == :nokogiri 29 | @fields ||= (hdoc.search(".//input[@name]",".//select[@name]",".//textarea")).map { |x| Field.new(x) } 30 | else 31 | @fields ||= (hdoc.search("//input[@name]") + hdoc.search("//select[@name]") + hdoc.search("//textarea")).map { |x| Field.new(x) } 32 | end 33 | end 34 | 35 | def field_names 36 | fields.map { |x| x.get_attribute("name") } 37 | end 38 | 39 | def action 40 | return self.get_attribute("action") 41 | end 42 | 43 | def type 44 | return self.get_attribute("method") 45 | end 46 | 47 | end 48 | 49 | class Field < Form 50 | def value 51 | self._value.nil? ? self.get_attribute("value") : self._value 52 | end 53 | 54 | alias_method :get_value, :value #:nodoc: 55 | alias_method :fvalue, :value #:nodoc: 56 | 57 | def fname 58 | self.get_attribute('name') 59 | end 60 | 61 | def ftype 62 | self.get_attribute('type') 63 | end 64 | 65 | def _value 66 | # selection (array) 67 | if PARSER == :nokogiri 68 | ret = hdoc.search(".//option[@selected]").collect { |x| x.get_attribute("value") } 69 | else 70 | ret = hdoc.search("//option[@selected]").collect { |x| x.get_attribute("value") } 71 | end 72 | case ret.size 73 | when 0 74 | if name == "textarea" 75 | if PARSER == :nokogiri 76 | hdoc.text 77 | else 78 | hdoc.innerHTML 79 | end 80 | else 81 | hdoc.get_attribute("value") if (hdoc.get_attribute("checked") || !hdoc.get_attribute("type") =~ /radio|checkbox/) 82 | end 83 | when 1 84 | ret.first 85 | else 86 | ret 87 | end 88 | end 89 | 90 | def to_arr 91 | return [self.name, self.ftype, self.fname, self.fvalue] 92 | end 93 | 94 | def to_text 95 | return "tag=#{self.name} type=#{self.ftype} name=#{self.fname} value=#{self.fvalue}" 96 | end 97 | 98 | end 99 | end 100 | -------------------------------------------------------------------------------- /lib/wwmd/page/parsing_convenience.rb: -------------------------------------------------------------------------------- 1 | module WWMD 2 | class Page 3 | #:section: Parsing convenience methods 4 | # methods that help parse and find information on a page including 5 | # access to forms etc. 6 | 7 | # grep for regexp and remove leading whitespace 8 | def grep(reg) 9 | self.body_data.grep(reg).map { |i| i.gsub(/^\s+/, "") } 10 | end 11 | 12 | # return this page's form (at index id) as a FormArray 13 | def get_form(id=nil) 14 | id = 0 if not id 15 | return nil if forms.empty? || !forms[id] 16 | f = @forms[id] 17 | action = f.action 18 | action ||= self.action 19 | url_action = @urlparse.parse(self.cur,action).to_s 20 | type = f.type 21 | FormArray.new do |x| 22 | x.set_fields(f.fields) 23 | x.action = url_action 24 | x.type = type 25 | end 26 | end 27 | 28 | # return the complete url to the form action on this page 29 | def action(id=nil) 30 | id = 0 if not id 31 | act = self.forms[id].action 32 | return self.last_effective_url if (act.nil? || act.empty?) 33 | return @urlparse.parse(self.last_effective_url,act).to_s 34 | end 35 | 36 | # return an array of Element objects for an xpath search 37 | def search(xpath) 38 | self.scrape.hdoc.search(xpath) 39 | end 40 | 41 | # return an array of inner_html for each \n" 270 | ret << "\n" 271 | return ret 272 | end 273 | 274 | # add markers for burp intruder to form 275 | def burpify(all=true) #:nodoc: 276 | ret = self.clone 277 | ret.each_index do |i| 278 | next if ret[i][0] =~ /^__/ 279 | # ret.set_value!(i,"#{ret.get_value(i)}" + "\302\247" + "\302\247") 280 | if all 281 | ret.set_value!(i,"\244" + "#{ret.get_value(i)}" + "\244") 282 | else 283 | ret.set_value!(i,"#{ret.get_value(i)}" + "\244" + "\244") 284 | end 285 | end 286 | ret.to_post.pbcopy 287 | return ret 288 | end 289 | 290 | # return md5 hash of sorted list of keys 291 | def fingerprint 292 | return (self.action.to_s + self.map { |k,v| k }.sort.to_s).md5 293 | end 294 | alias_method :fp, :fingerprint #:nodoc: 295 | 296 | def from_array(arr) 297 | self.clear 298 | arr.each { |k,v| self[k] = v } 299 | end 300 | 301 | end 302 | end 303 | --------------------------------------------------------------------------------