├── test ├── http_get.txt ├── http_post.txt ├── spec_urlencode.rb └── spec_hedge.rb ├── Rakefile ├── hedge.gemspec ├── lib └── hedge │ ├── urlencode.tt │ └── hedge.tt └── README.md /test/http_get.txt: -------------------------------------------------------------------------------- 1 | GET / HTTP/1.1 2 | Host: http://www.example.org 3 | User-Agent: Mozilla/5.0 (X11; Linux i686; rv:11.0) Gecko/20100101 Firefox/11.0 4 | Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 5 | Accept-Language: en-us,en;q=0.5 6 | Accept-Encoding: gzip, deflate 7 | Connection: keep-alive 8 | 9 | -------------------------------------------------------------------------------- /test/http_post.txt: -------------------------------------------------------------------------------- 1 | POST / HTTP/1.1 2 | User-Agent: curl/7.25.0 (i486-pc-linux-gnu) libcurl/7.25.0 OpenSSL/1.0.1b zlib/1.2.6 libidn/1.24 libssh2/1.4.0 librtmp/2.3 3 | Host: http://www.example.org 4 | Accept: */* 5 | Content-Length: 35 6 | Content-Type: application/x-www-form-urlencoded 7 | 8 | param1=1¶m2=2¶m3=3¶m1=4 9 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'treetop' 2 | require 'yard' 3 | require 'rake/clean' 4 | 5 | CLEAN.include('doc/', '*.gem') 6 | 7 | desc "Build the gem from gemspec" 8 | task :build do 9 | sh 'gem build hedge.gemspec' 10 | end 11 | 12 | desc "Build and install the gem from current gemspec" 13 | task :install => [:clean, :build] do 14 | sh 'gem install hedge*.gem' 15 | end 16 | -------------------------------------------------------------------------------- /test/spec_urlencode.rb: -------------------------------------------------------------------------------- 1 | require 'bacon' 2 | require 'polyglot' 3 | require 'treetop' 4 | 5 | require 'hedge/urlencode.tt' 6 | 7 | describe UrlEncodeParser do 8 | 9 | before do 10 | @parser = UrlEncodeParser.new 11 | end 12 | 13 | it "should parse url-encoded parameter data" do 14 | data = "param1=1¶m2=2¶m3=3¶m1=4" 15 | @parser.parse(data).content.should == { 16 | 'param1' => ['1', '4'], 17 | 'param2' => ['2'], 18 | 'param3' => ['3'] 19 | } 20 | end 21 | end 22 | -------------------------------------------------------------------------------- /hedge.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | Gem::Specification.new do |s| 4 | s.name = %q{hedge} 5 | s.version = "0.0.3" 6 | 7 | s.authors = ["Patrick Hof"] 8 | s.date = %q{2012-06-02} 9 | s.email = %q{courts@offensivethinking.org} 10 | s.files = %w(lib/hedge/hedge.tt lib/hedge/urlencode.tt) 11 | s.homepage = %q{http://www.offensivethinking.org} 12 | s.require_paths = ["lib"] 13 | s.summary = %q{A simple and incomplete HTTP request parser} 14 | s.add_dependency('treetop', '>= 1.4.9') 15 | end 16 | -------------------------------------------------------------------------------- /lib/hedge/urlencode.tt: -------------------------------------------------------------------------------- 1 | # Simple parser for url-encoded data. 2 | # 3 | # This software is licensed under the Creative 4 | # Commons CC0 1.0 Universal License. 5 | # To view a copy of this license, visit 6 | # http://creativecommons.org/publicdomain/zero/1.0/legalcode 7 | # 8 | # @author Patrick Hof 9 | 10 | grammar UrlEncode 11 | 12 | rule urlencoded 13 | param:param+ delimiter? 14 | { 15 | def content 16 | body = {} 17 | param.elements.each do |el_node| 18 | el = el_node.content 19 | if body.has_key?(el[0]) 20 | body[el[0]] << el[1] 21 | else 22 | body[el[0]] = [el[1]] 23 | end 24 | end 25 | return body 26 | end 27 | } 28 | end 29 | 30 | rule param 31 | key:param_key '=' val:(param_val / '') '&'? 32 | { 33 | def content 34 | [key.text_value, val.text_value] 35 | end 36 | } 37 | end 38 | 39 | rule param_val 40 | param_key 41 | end 42 | 43 | # FIXME This list seems to be incomplete 44 | rule param_key 45 | [a-zA-Z0-9\-%+]+ 46 | end 47 | 48 | rule delimiter 49 | "\n" / "\r\n" 50 | end 51 | 52 | end 53 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | hedge 2 | ===== 3 | 4 | Author: Patrick Hof 5 | License: [CC0 1.0 Universal License](http://creativecommons.org/publicdomain/zero/1.0/legalcode) 6 | 7 | Download: git clone git://github.com/courts/hedge.git 8 | 9 | hedge is a simple PEG parser written in Treetop to parse HTTP requests into a 10 | data structure. It does not however implement the whole HTTP specification, so 11 | it is rather incomplete. It's just a little side project that might evolve into 12 | something serious one day. 13 | 14 | Why the name? 15 | ------------- 16 | It's tough to find a name for an HTTP parser, because there are already so many 17 | of them. The obvious 'httpparser' just leads to more confusion, so I chose a 18 | name that came to my mind when thinking about trees (because of treetop) and 19 | something with 'h' in it. It also didn't return anything when running 'gem 20 | search -r hedge'. 21 | 22 | Examples 23 | -------- 24 | ```ruby 25 | require 'polyglot' 26 | require 'treetop' 27 | require 'hedge/hedge.tt' 28 | 29 | HedgeReqParser.new().parse(http_request) 30 | ``` 31 | 32 | See spec_hedge.rb for more specific examples. 33 | 34 | RubyGems 35 | -------- 36 | 37 | A gemspec file is included, so you can build and install hedge as a gem with: 38 | 39 | gem build hedge.gemspec 40 | gem install hedge-x.x.x.gem 41 | -------------------------------------------------------------------------------- /test/spec_hedge.rb: -------------------------------------------------------------------------------- 1 | require 'bacon' 2 | require 'polyglot' 3 | require 'treetop' 4 | 5 | require 'hedge/hedge.tt' 6 | 7 | describe HedgeReqParser do 8 | 9 | before do 10 | @parser = HedgeReqParser.new 11 | end 12 | 13 | it "should parse an HTTP GET request" do 14 | res = @parser.parse(File.open('http_get.txt').read()) 15 | res.content.should == { 16 | :verb => "GET", 17 | :url => "/", 18 | :version => "HTTP/1.1", 19 | :headers => { 20 | "Host" => "http://www.example.org", 21 | "User-Agent" => "Mozilla/5.0 (X11; Linux i686; rv:11.0) Gecko/20100101 Firefox/11.0", 22 | "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 23 | "Accept-Language" => "en-us,en;q=0.5", 24 | "Accept-Encoding" => "gzip, deflate", 25 | "Connection" => "keep-alive" 26 | }, 27 | :body => "" 28 | } 29 | end 30 | 31 | it "should parse an HTTP POST request" do 32 | res = @parser.parse(File.open('http_post.txt').read()) 33 | unless res 34 | puts @parser.failure_reason 35 | exit 36 | end 37 | res.content[:verb].should == "POST" 38 | res.content[:body].should == "param1=1¶m2=2¶m3=3¶m1=4" 39 | res.content[:body_hash].should == { 40 | 'param1' => ['1', '4'], 41 | 'param2' => ['2'], 42 | 'param3' => ['3'] 43 | } 44 | end 45 | 46 | end 47 | -------------------------------------------------------------------------------- /lib/hedge/hedge.tt: -------------------------------------------------------------------------------- 1 | # Simple and incomplete HTTP request parser. 2 | # 3 | # This software is licensed under the Creative 4 | # Commons CC0 1.0 Universal License. 5 | # To view a copy of this license, visit 6 | # http://creativecommons.org/publicdomain/zero/1.0/legalcode 7 | # 8 | # @author Patrick Hof 9 | 10 | require 'polyglot' 11 | require 'treetop' 12 | 13 | require 'hedge/urlencode.tt' 14 | 15 | grammar HedgeReq 16 | 17 | rule request 18 | req:req delimiter h:headers delimiter b:body 19 | { 20 | def content 21 | res = req.content 22 | res[:headers] = h.content 23 | res[:body] = b.content 24 | unless res[:body].empty? 25 | ctype = res[:headers]["Content-Type"] 26 | if ctype = 'application/x-www-form-urlencoded' 27 | parser = UrlEncodeParser.new 28 | res[:body_hash] = parser.parse(res[:body]).content 29 | end 30 | end 31 | return res 32 | end 33 | } 34 | end 35 | 36 | rule req 37 | verb:verb space url:url space ver:version 38 | { 39 | def content 40 | req = { 41 | :verb => verb.text_value, 42 | :url => url.text_value, 43 | :version => ver.text_value 44 | } 45 | end 46 | } 47 | end 48 | 49 | rule verb 50 | 'GET' / 'POST' / 'PUT' / 'DELETE' / 'HEAD' / 'OPTIONS' / 'TRACE' / 'CONNECT' / 'PATCH' 51 | end 52 | 53 | rule url 54 | [a-zA-Z0-9/%:.] 55 | end 56 | 57 | rule version 58 | 'HTTP/1.0' / 'HTTP/1.1' 59 | end 60 | 61 | rule headers 62 | header* 63 | { 64 | def content 65 | h = {} 66 | elements.each do |el| 67 | elc = el.content 68 | h[elc[0]] = elc[1] 69 | end 70 | h 71 | end 72 | } 73 | end 74 | 75 | rule header 76 | key:h_key ':' space val:h_val delimiter 77 | { 78 | def content 79 | [key.text_value, val.text_value] 80 | end 81 | } 82 | end 83 | 84 | rule h_key 85 | [a-zA-Z0-9\-]+ 86 | end 87 | 88 | rule h_val 89 | (!"\r" !"\n" .)* 90 | end 91 | 92 | rule body 93 | .* 94 | { 95 | def content 96 | return text_value.chomp 97 | end 98 | } 99 | end 100 | 101 | rule delimiter 102 | "\n" / "\r\n" 103 | end 104 | 105 | rule space 106 | [ \t]+ 107 | end 108 | 109 | end 110 | --------------------------------------------------------------------------------