├── .gitignore ├── Changes ├── README ├── TODO ├── dist.ini ├── eg ├── README ├── adblock.pl ├── anonymiser.pl ├── ayb.pl ├── bork.pl ├── dragon.pl ├── flv.pl ├── fudd.pl ├── https.pl ├── javascript.pl ├── js.pl ├── leet.pl ├── logger.pl ├── outline.pl ├── pdf.pl ├── perlmonks.pl ├── post.pl ├── proxy-auth.pl ├── proxy.pl ├── rainbow.pl ├── rfc.pl ├── rot13.pl ├── switch.pl ├── tracker.pl ├── trim.pl └── yahoogroups.pl ├── html ├── .htaccess ├── favicon.ico ├── http-proxy.css ├── index.html ├── robots.txt └── talks │ ├── index.html │ └── ye2004 │ ├── html-syntax.css │ ├── index.html │ ├── perltidy.css │ ├── podpoint.css │ ├── slide001.html │ ├── slide002.html │ ├── slide003.html │ ├── slide004.html │ ├── slide005.html │ ├── slide006.html │ ├── slide007.html │ ├── slide008.html │ ├── slide009.html │ ├── slide010.html │ ├── slide011.html │ ├── slide012.html │ ├── slide013.html │ ├── slide014.html │ ├── slide015.html │ ├── slide016.html │ ├── slide017.html │ ├── slide018.html │ ├── slide019.html │ ├── slide020.html │ ├── slide021.html │ ├── slide022.html │ ├── slide023.html │ ├── slide024.html │ ├── slide025.html │ ├── slide026.html │ ├── slide027.html │ ├── slide028.html │ ├── slide029.html │ ├── slide030.html │ ├── slide031.html │ ├── slide032.html │ ├── slide033.html │ ├── slide034.html │ ├── slide035.html │ ├── slide036.html │ ├── slide037.html │ ├── slide038.html │ └── wip.png ├── lib └── HTTP │ ├── Proxy.pm │ └── Proxy │ ├── BodyFilter.pm │ ├── BodyFilter │ ├── complete.pm │ ├── htmlparser.pm │ ├── htmltext.pm │ ├── lines.pm │ ├── save.pm │ ├── simple.pm │ └── tags.pm │ ├── Engine.pm │ ├── Engine │ ├── Legacy.pm │ ├── NoFork.pm │ ├── ScoreBoard.pm │ └── Threaded.pm │ ├── FilterStack.pm │ ├── HeaderFilter.pm │ └── HeaderFilter │ ├── simple.pm │ └── standard.pm └── t ├── 00basic.t ├── 05new.t ├── 10init.t ├── 11log.t ├── 15accessors.t ├── 15deprecated.t ├── 16stash.t ├── 17fstack.t ├── 18engine.t ├── 20dummy.t ├── 20keepalive.t ├── 22http.t ├── 22transparent.t ├── 23connect.t ├── 23https.t ├── 40push_filters.t ├── 41filters.t ├── 42will_modify.t ├── 50hopbyhop.t ├── 50standard.t ├── 50via.t ├── 51simple.t ├── 51simple2.t ├── 61simple.t ├── 61simple2.t ├── 64htmltext.t ├── 64lines.t ├── 64tags.t ├── 66htmlparser.t ├── 67complete.t ├── 67save.t ├── 71rot13.t ├── 90httpstatus.t ├── README ├── lib └── ProxyUtils.pm └── test.html /.gitignore: -------------------------------------------------------------------------------- 1 | *.bak 2 | *.old 3 | *.tmp 4 | *.tar.gz 5 | *~ 6 | .build 7 | blib 8 | cover_db 9 | HTTP-Proxy-* 10 | mess 11 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | HTTP::Proxy 2 | ----------- 3 | 4 | This module is a pure Perl HTTP proxy. 5 | 6 | Its main use should be to record and/or modify web sessions, so as to 7 | help users create web robots, web testing suites, as well as proxy 8 | systems than can transparently alter the requests to and answers from 9 | an origin server. 10 | 11 | The eg/ directory holds a few examples. See eg/README for details. 12 | 13 | There is also a t/README file that explains the tests strategy. 14 | 15 | !WARNING!WARNING!WARNING!WARNING!WARNING!WARNING!WARNING!WARNING! 16 | 17 | The way the filters are implemented has changed in version 0.10 18 | of HTTP::Proxy. You can now play with two dedicated filter classes 19 | and notice slight changes in the HTTP::Proxy interface. 20 | 21 | !WARNING!WARNING!WARNING!WARNING!WARNING!WARNING!WARNING!WARNING! 22 | 23 | Recommended order for reading the documentation: 24 | 1) HTTP::Proxy 25 | 2) HTTP::Proxy::HeaderFilter and HTTP::Proxy::BodyFilter 26 | 3) included standard filter classes and code examples in eg/ 27 | 28 | -------------------------------------------------------------------------------- /dist.ini: -------------------------------------------------------------------------------- 1 | ; the basics 2 | name = HTTP-Proxy 3 | author = Philippe Bruhat (BooK) 4 | license = Perl_5 5 | copyright_holder = Philippe Bruhat (BooK) 6 | ; copyright_year = 2002-2019 7 | 8 | ; file modifiers 9 | [PkgVersion] 10 | [PodVersion] 11 | 12 | ; file generators 13 | [ManifestSkip] 14 | [Manifest] 15 | [License] 16 | [MakeMaker] 17 | 18 | [PruneCruft] 19 | [PruneFiles] 20 | filename = setup 21 | filename = TODO 22 | match = \.patch$ 23 | match = mess/.* 24 | match = cover_db 25 | match = html/.* 26 | 27 | [GatherDir] 28 | 29 | ; metadata 30 | [MetaYAML] 31 | [MetaJSON] 32 | [AutoPrereqs] 33 | 34 | [Prereqs] 35 | perl = 5.008 36 | 37 | [Prereqs / BuildRequires] 38 | Test::CPAN::Meta = 39 | Test::Pod = 40 | Test::Pod::Coverage = 41 | Pod::Coverage::TrustPod = 42 | 43 | [Prereqs / TestRequires] 44 | HTML::Parser = 3 45 | Test::More = 0.88 46 | 47 | [ExecDir] 48 | [ShareDir] 49 | 50 | [Keywords] 51 | keywords = http proxy 52 | 53 | [MetaResources] 54 | repository.web = http://github.com/book/HTTP-Proxy 55 | repository.url = http://github.com/book/HTTP-Proxy.git 56 | repository.type = git 57 | bugtracker.web = http://rt.cpan.org/NoAuth/Bugs.html?Dist=HTTP-Proxy 58 | bugtracker.mailto = bug-http-proxy@rt.cpan.org 59 | 60 | [MetaProvides::Package] 61 | 62 | [Meta::Contributors] 63 | contributor = e477 64 | contributor = Vincenzo Buttazzo 65 | contributor = Slaven Rezic 66 | contributor = Tom Hukins 67 | contributor = Salve J. Nilsen 68 | contributor = Masahiro Nagano 69 | contributor = Ashley Pond V 70 | contributor = Angelos Karageorgiou 71 | contributor = Gregor Herrmann 72 | contributor = Maurice Aubrey 73 | contributor = Marek Rouchal 74 | contributor = Jimbo 75 | contributor = Roland Stigge 76 | contributor = Gunnar Wolf 77 | contributor = Matsuno Tokuhiro 78 | contributor = Ken Williams 79 | contributor = Max Maischein 80 | contributor = Mark Tilford 81 | contributor = Chris Dolan 82 | contributor = Randal L. Schwartz 83 | contributor = Simon Cozens 84 | contributor = Christian Laursen 85 | contributor = Emmanuel Di Prétoro 86 | contributor = Mathieu Arnold 87 | contributor = Paul Makepeace 88 | contributor = Martin Zdila 89 | contributor = Jim Cromie 90 | contributor = Stéphane Payrard 91 | contributor = David Landgren 92 | contributor = Éric Cholet 93 | 94 | ; tests 95 | [MetaTests] 96 | [ExtraTests] 97 | [Test::ReportPrereqs] 98 | [Test::Compile] 99 | [PodSyntaxTests] 100 | [PodCoverageTests] 101 | 102 | ; release 103 | [NextRelease] 104 | format = %v %{yyyy-MM-dd}d %P 105 | 106 | [Git::NextVersion] 107 | 108 | [TestRelease] 109 | [ConfirmRelease] 110 | [UploadToCPAN] 111 | 112 | ; git 113 | [Git::Check] 114 | [Git::Commit] 115 | commit_msg = Changes for version %v 116 | changelog = Changes 117 | 118 | [Git::Tag] 119 | tag_format = v%v 120 | tag_message = %N v%v 121 | 122 | [Git::Push] 123 | push_to = origin 124 | push_to = github 125 | -------------------------------------------------------------------------------- /eg/adblock.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use strict; 3 | use HTTP::Proxy qw( :log ); 4 | use HTTP::Proxy::HeaderFilter::simple; 5 | use vars qw( $re ); 6 | 7 | # this is a very simple ad blocker 8 | # a full-fledged ad blocker should be a module 9 | 10 | # this dot is *not* a web bug ;-) 11 | my $no = HTTP::Response->new( 200 ); 12 | $no->content_type('text/plain'); 13 | $no->content('.'); 14 | 15 | my $filter = HTTP::Proxy::HeaderFilter::simple->new( sub { 16 | my ( $self, $headers, $message ) = @_; 17 | $self->proxy->response( $no ) if $message->uri->host =~ /$re/o; 18 | } ); 19 | 20 | my $proxy = HTTP::Proxy->new( @ARGV ); 21 | $proxy->push_filter( request => $filter ); 22 | $proxy->start; 23 | 24 | # a short and basic list 25 | BEGIN { 26 | $re = join '|', map { quotemeta } qw( 27 | ads.wanadooregie.com 28 | cybermonitor.com 29 | doubleclick.com 30 | adfu.blockstackers.com 31 | bannerswap.com 32 | click2net.com 33 | clickxchange.com 34 | dimeclicks.com 35 | fastclick.net 36 | mediacharger.com 37 | mediaplex.com 38 | myaffiliateprogram.com 39 | netads.hotwired.com 40 | valueclick.com 41 | ); 42 | } 43 | 44 | -------------------------------------------------------------------------------- /eg/anonymiser.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | # yeah, I know, I write UK English ;-) 3 | use HTTP::Proxy qw( :log ); 4 | use HTTP::Proxy::HeaderFilter::simple; 5 | use strict; 6 | 7 | # a very simple proxy 8 | my $proxy = HTTP::Proxy->new( @ARGV ); 9 | 10 | # the anonymising filter 11 | $proxy->push_filter( 12 | mime => undef, 13 | request => HTTP::Proxy::HeaderFilter::simple->new( 14 | sub { $_[1]->remove_header(qw( User-Agent From Referer Cookie Cookie2 )) } 15 | ), 16 | response => HTTP::Proxy::HeaderFilter::simple->new( 17 | sub { $_[1]->remove_header(qw( Set-Cookie Set-Cookie2 )) } 18 | ) 19 | ); 20 | 21 | $proxy->start; 22 | -------------------------------------------------------------------------------- /eg/ayb.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use HTTP::Proxy qw( :log ); 3 | use HTTP::Proxy::BodyFilter::htmlparser; 4 | use HTML::Parser; 5 | use strict; 6 | 7 | # the proxy 8 | my $proxy = HTTP::Proxy->new( @ARGV ); 9 | 10 | # all your base... 11 | my @ayb = grep { !/^$/ } split/$/m, << 'AYB'; 12 | In A.D. 2101 13 | War was beginning. 14 | What happen ? 15 | Somebody set up us the bomb. 16 | We get signal. 17 | What ! 18 | Main screen turn on. 19 | It's You !! 20 | How are you gentlemen !! 21 | All your base are belong to us. 22 | You are on the way to destruction. 23 | What you say !! 24 | You have no chance to survive make your time. 25 | HA HA HA HA .... 26 | Take off every 'zig' !! 27 | You know what you doing. 28 | Move 'zig'. 29 | For great justice. 30 | AYB 31 | 32 | # the AYB parser 33 | # replaces heading content with the AYB text 34 | my $parser = HTML::Parser->new( api_version => 3 ); 35 | $parser->handler( 36 | start_document => sub { 37 | my $self = shift; 38 | $self->{ayb} = 0; 39 | $self->{i} = int rand @ayb; 40 | }, 41 | "self" 42 | ); 43 | 44 | $parser->handler( 45 | start => sub { 46 | my ( $self, $tag, $attr, $text ) = @_; 47 | $self->{ayb} = 1 if $tag =~ /^h\d/; 48 | if( $tag eq 'img' ) { 49 | $attr->{src} = 'http://home.uchicago.edu/~obmontoy/cats.jpg'; 50 | $text = "<$tag " 51 | . join(' ', map { qq($_="$attr->{$_}") } keys %$attr ) 52 | . ">"; 53 | } 54 | $self->{output} .= $text; 55 | }, 56 | "self,tagname,attr,text" 57 | ); 58 | 59 | $parser->handler( 60 | end => sub { 61 | my ( $self, $tag, $text ) = @_; 62 | if( $tag =~ /^h\d/ ) { 63 | $self->{ayb} = 0; 64 | } 65 | $self->{output} .= $text; 66 | }, 67 | "self,tagname,text" 68 | ); 69 | 70 | $parser->handler( 71 | default => sub { 72 | my ( $self, $text ) = @_; 73 | $self->{output} .= $self->{ayb} ? $ayb[($self->{i} += 1 ) %= @ayb] : $text; 74 | }, 75 | "self,text" 76 | ); 77 | 78 | $proxy->push_filter( 79 | mime => 'text/html', 80 | response => HTTP::Proxy::BodyFilter::htmlparser->new( $parser, rw => 1 ), 81 | ); 82 | 83 | $proxy->start; 84 | -------------------------------------------------------------------------------- /eg/bork.pl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/book/HTTP-Proxy/7874e910b9d5ffaf55afc106a3e080149879f50b/eg/bork.pl -------------------------------------------------------------------------------- /eg/dragon.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use HTTP::Proxy; 3 | use HTTP::Proxy::HeaderFilter::simple; 4 | use HTTP::Proxy::BodyFilter::simple; 5 | use HTTP::Proxy::BodyFilter::complete; 6 | use MIME::Base64; 7 | use Fcntl ':flock'; 8 | use strict; 9 | 10 | # the proxy 11 | my $proxy = HTTP::Proxy->new( @ARGV ); 12 | 13 | # the status page: 14 | # - auto-refresh (quickly at first, then more slowly) 15 | # - count the number of games and modify the title 16 | my $seen_title; 17 | $proxy->push_filter( 18 | host => 'www.dragongoserver.net', 19 | path => '^/status.php', 20 | # auto-refresh 21 | response => HTTP::Proxy::HeaderFilter::simple->new( 22 | sub { 23 | my ( $self, $headers, $response ) = @_; 24 | ($response->request->uri->query || '') =~ /reload=(\d+)/; 25 | my $n = ($1 || 0) + 1; 26 | my $delay = $n < 5 ? 30 : $n < 15 ? 60 : $n < 25 ? 300 : 3600; 27 | $headers->push_header( Refresh => "$delay;url=" 28 | . $response->request->uri->path 29 | . "?reload=$n" ); 30 | } 31 | ), 32 | # count games 33 | response => HTTP::Proxy::BodyFilter::complete->new(), 34 | response => HTTP::Proxy::BodyFilter::simple->new( 35 | filter => sub { 36 | my ( $self, $dataref, $message, $protocol, $buffer ) = @_; 37 | next if ! $$dataref; 38 | 39 | # count the games and change the title 40 | my $n = 0; $n++ while $$dataref =~ /game\.php\?gid=\d+/g; 41 | my $s = $n > 1 ? "s" : ""; $n ||= "No"; 42 | $$dataref =~ s!.*?!$n go game$s pending!s; 43 | }, 44 | ), 45 | ); 46 | 47 | # the game page: 48 | # - remove the Message: textarea 49 | # - add a link to make it appear when needed 50 | $proxy->push_filter( 51 | host => 'www.dragongoserver.net', 52 | path => '^/game.php', 53 | response => HTTP::Proxy::BodyFilter::complete->new(), 54 | response => HTTP::Proxy::BodyFilter::simple->new( 55 | sub { 56 | my $msg = '&msg=yes'; 57 | my $uri = $_[2]->request->uri; 58 | if( $uri =~ s/$msg//o ) { $msg = ''; } 59 | else { ${$_[1]} =~ s|()||; } 60 | ${$_[1]} =~ s|(Message:)|$1|; 61 | } 62 | ) 63 | ); 64 | 65 | $proxy->start; 66 | 67 | -------------------------------------------------------------------------------- /eg/flv.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use HTTP::Proxy; 5 | use HTTP::Proxy::BodyFilter::save; 6 | use Digest::MD5 qw( md5_hex); 7 | use POSIX qw( strftime ); 8 | 9 | my $proxy = HTTP::Proxy->new(@ARGV); 10 | 11 | # a filter to save FLV files somewhere 12 | my $flv_filter = HTTP::Proxy::BodyFilter::save->new( 13 | filename => sub { 14 | my ($message) = @_; 15 | my $uri = $message->request->uri; 16 | 17 | # get the id, or fallback to some md5 hash 18 | my ($id) = ( $uri->query || '' ) =~ /id=([^&;]+)/i; 19 | $id = md5_hex($uri) unless $id; 20 | 21 | # compute the filename (including the base site name) 22 | my ($host) = $uri->host =~ /([^.]+\.[^.]+)$/; 23 | my $file = strftime "flv/%Y-%m-%d/${host}_$id.flv", localtime; 24 | 25 | # ignore it if we already have it 26 | return if -e $file && -s $file == $message->content_length; 27 | 28 | # otherwise, save 29 | return $file; 30 | }, 31 | ); 32 | 33 | # push the filter for all MIME types we want to catch 34 | for my $mime (qw( video/flv video/x-flv )) { 35 | $proxy->push_filter( 36 | mime => $mime, 37 | response => $flv_filter, 38 | ); 39 | } 40 | 41 | $proxy->start; 42 | 43 | -------------------------------------------------------------------------------- /eg/fudd.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # based on Google's Elmer Fudd preference setting 4 | 5 | use HTTP::Proxy; 6 | use HTTP::Proxy::BodyFilter::tags; 7 | use HTTP::Proxy::BodyFilter::htmltext; 8 | use strict; 9 | 10 | my $proxy = HTTP::Proxy->new(@ARGV); 11 | 12 | $proxy->push_filter( 13 | mime => 'text/html', 14 | response => HTTP::Proxy::BodyFilter::tags->new, 15 | response => HTTP::Proxy::BodyFilter::htmltext->new( 16 | sub { y/r/w/; s/l(?=\w)/w/g } 17 | ) 18 | ); 19 | 20 | $proxy->start; 21 | 22 | -------------------------------------------------------------------------------- /eg/https.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use HTTP::Proxy; 3 | use HTTP::Proxy::HeaderFilter::simple; 4 | use HTTP::Proxy::BodyFilter::htmlparser; 5 | use HTTP::Proxy::BodyFilter::htmltext; 6 | use HTML::Parser; 7 | use strict; 8 | 9 | # where to find URI in tag attributes 10 | # (it actually a little more complicated, since some tags can have 11 | # several attributes that require an URI) 12 | my %links = ( 13 | a => 'href', 14 | area => 'href', 15 | base => 'href', 16 | link => 'href', 17 | frame => 'src', 18 | iframe => 'src', 19 | img => 'src', 20 | input => 'src', 21 | script => 'src', 22 | form => 'action', 23 | body => 'background', 24 | ); 25 | my $re_tags = join '|', sort keys %links; 26 | 27 | my $hrefparser = HTML::Parser->new( api_version => 3 ); 28 | 29 | # turn all https:// links to http://this_is_ssl links 30 | $hrefparser->handler( 31 | start => sub { 32 | my ( $self, $tag, $attr, $attrseq, $text ) = @_; 33 | if ( $tag =~ /^($re_tags)$/o 34 | && exists $attr->{$links{$1}} 35 | && substr( $attr->{$links{$1}}, 0, 8 ) eq "https://" ) 36 | { 37 | $attr->{$links{$1}} =~ s!^https://!http://this_is_ssl.!; 38 | $text = "<$tag " 39 | . join( ' ', map { qq($_="$attr->{$_}") } @$attrseq ) . ">"; 40 | } 41 | $self->{output} .= $text; 42 | }, 43 | "self,tagname,attr,attrseq,text" 44 | ); 45 | 46 | # by default copy everything 47 | $hrefparser->handler( 48 | default => sub { 49 | my ( $self, $text ) = @_; 50 | $self->{output} .= $text; 51 | }, 52 | "self,text" 53 | ); 54 | 55 | # the proxy itself 56 | my $proxy = HTTP::Proxy->new(@ARGV); 57 | 58 | $proxy->push_filter( 59 | mime => 'text/html', 60 | response => 61 | HTTP::Proxy::BodyFilter::htmlparser->new( $hrefparser, rw => 1 ), 62 | ); 63 | 64 | # detect https requests 65 | $proxy->push_filter( 66 | request => HTTP::Proxy::HeaderFilter::simple->new( 67 | sub { 68 | my ( $self, $headers, $message ) = @_; 69 | 70 | # find out the actual https site 71 | my $uri = $message->uri; 72 | if ( $uri =~ m!^http://this_is_ssl\.! ) { 73 | $uri->scheme("https"); 74 | my $host = $uri->host; 75 | $host =~ s!^this_is_ssl\.!!; 76 | $uri->host($host); 77 | } 78 | } 79 | ), 80 | response => HTTP::Proxy::HeaderFilter::simple->new( 81 | sub { 82 | my ( $self, $headers, $message ) = @_; 83 | 84 | # modify Location: headers in the response 85 | my $location = $headers->header( 'Location' ); 86 | if( $location =~ m!^https://! ) { 87 | $location =~ s!^https://!http://this_is_ssl.!; 88 | $headers->header( Location => $location ); 89 | } 90 | } 91 | ), 92 | ); 93 | 94 | $proxy->start; 95 | -------------------------------------------------------------------------------- /eg/javascript.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use HTTP::Proxy; 3 | use HTML::Parser; 4 | use HTTP::Proxy::BodyFilter::htmlparser; 5 | 6 | # define the filter (the most difficult part) 7 | # filters not using HTML::Parser are much simpler :-) 8 | 9 | my $parser = HTML::Parser->new( api_version => 3 ); 10 | $parser->handler( 11 | start => sub { 12 | my ( $self, $tag, $text ) = @_; 13 | $self->{output} .= $text; 14 | $self->{output} .= "YOUR JAVASCRIPT HERE" if $tag eq 'body'; 15 | }, 16 | "self,tagname,text" 17 | ); 18 | $parser->handler( 19 | default => sub { 20 | my ($self, $text) = @_; 21 | $self->{output} .= $text; 22 | }, 23 | "self,text" 24 | ); 25 | 26 | # this is a read-write filter (rw => 1) 27 | # that is the reason why we had to copy everything into $self->{output} 28 | my $filter = HTTP::Proxy::BodyFilter::htmlparser->new( $parser, rw => 1 ); 29 | 30 | # create and launch the proxy 31 | my $proxy = HTTP::Proxy->new(@ARGV); 32 | $proxy->push_filter( response => $filter, mime => 'text/html' ); 33 | $proxy->start(); 34 | 35 | -------------------------------------------------------------------------------- /eg/js.pl: -------------------------------------------------------------------------------- 1 | use HTTP::Proxy; 2 | use HTTP::Proxy::BodyFilter::save; 3 | 4 | my $proxy = HTTP::Proxy->new(@ARGV); 5 | 6 | # save javascript files as we browse them 7 | $proxy->push_filter( 8 | path => qr!/.js$!, 9 | response => HTTP::Proxy::BodyFilter::save->new( 10 | template => '%f', 11 | prefix => 'javascript', 12 | multiple => 0, 13 | keep_old => 1, 14 | ) 15 | ); 16 | 17 | $proxy->start; 18 | 19 | -------------------------------------------------------------------------------- /eg/leet.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use HTTP::Proxy qw( :log ); 3 | use HTTP::Proxy::BodyFilter::tags; 4 | use HTTP::Proxy::BodyFilter::htmltext; 5 | use strict; 6 | 7 | # a very simple proxy 8 | my $proxy = HTTP::Proxy->new( @ARGV ); 9 | 10 | my %leet = ( 11 | a => [qw( 4 /-\ @ )], 12 | b => ['|3'], 13 | c => [qw! c ( < [ !], 14 | e => [qw( e 3 )], 15 | g => [qw( g 6 )], 16 | h => [qw! h |-| )-( !], 17 | k => [qw( k |< ]{ )], 18 | i => ['i', '!'], 19 | l => [ 'l', "1", "|" ], 20 | m => [ 'm', "|V|", "|\\/|" ], 21 | n => ["|\\|"], 22 | o => ['o', "0"], 23 | s => [ "5", "Z" ], 24 | t => [ "7", "+" ], 25 | u => [qw( u \_/ )], 26 | v => [qw( v \/ )], 27 | w => [qw( vv `// )], 28 | 'y' => ['j', '`/'], 29 | z => ["2"], 30 | ); 31 | 32 | # but a complicated filter 33 | $proxy->push_filter( 34 | mime => 'text/html', 35 | response => HTTP::Proxy::BodyFilter::tags->new, 36 | response => HTTP::Proxy::BodyFilter::htmltext->new( 37 | sub { 38 | s/([a-zA-Z])/$leet{lc $1}[rand @{$leet{lc $1}}]||$1/ge; 39 | } 40 | ) 41 | ); 42 | 43 | $proxy->start; 44 | 45 | -------------------------------------------------------------------------------- /eg/logger.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use strict; 3 | use HTTP::Proxy; 4 | use HTTP::Proxy::HeaderFilter::simple; 5 | use HTTP::Proxy::BodyFilter::simple; 6 | use CGI::Util qw( unescape ); 7 | 8 | # get the command-line parameters 9 | my %args = ( 10 | peek => [], 11 | header => [], 12 | mime => 'text/*', 13 | ); 14 | { 15 | my $args = '(' . join( '|', keys %args ) . ')'; 16 | for ( my $i = 0 ; $i < @ARGV ; $i += 2 ) { 17 | if ( $ARGV[$i] =~ /$args/o ) { 18 | if ( ref $args{$1} ) { 19 | push @{ $args{$1} }, $ARGV[ $i + 1 ]; 20 | } 21 | else { 22 | $args{$1} = $ARGV[ $i + 1 ]; 23 | } 24 | splice( @ARGV, $i, 2 ); 25 | redo if $i < @ARGV; 26 | } 27 | } 28 | } 29 | 30 | # the headers we want to see 31 | my @srv_hdr = ( 32 | qw( Content-Type Set-Cookie Set-Cookie2 WWW-Authenticate Location ), 33 | @{ $args{header} } 34 | ); 35 | my @clt_hdr = 36 | ( qw( Cookie Cookie2 Referer Referrer Authorization ), @{ $args{header} } ); 37 | 38 | # NOTE: Body request filters always receive the request body in one pass 39 | my $post_filter = HTTP::Proxy::BodyFilter::simple->new( 40 | begin => sub { $_[0]->{binary} = 0; }, 41 | filter => sub { 42 | my ( $self, $dataref, $message, $protocol, $buffer ) = @_; 43 | print STDOUT "\n", $message->method, " ", $message->uri, "\n"; 44 | print_headers( $message, @clt_hdr ); 45 | 46 | if ( $self->{binary} || $$dataref =~ /\0/ ) { 47 | $self->{binary} = 1; 48 | print STDOUT " (not printing binary data)\n"; 49 | return; 50 | } 51 | 52 | # this is from CGI.pm, method parse_params() 53 | my (@pairs) = split( /[&;]/, $$dataref ); 54 | for (@pairs) { 55 | my ( $param, $value ) = split( '=', $_, 2 ); 56 | $param = unescape($param); 57 | $value = unescape($value); 58 | printf STDOUT " %-20s => %s\n", $param, $value; 59 | } 60 | } 61 | ); 62 | 63 | my $get_filter = HTTP::Proxy::HeaderFilter::simple->new( 64 | sub { 65 | my ( $self, $headers, $message ) = @_; 66 | my $req = $message->request; 67 | if ( $req->method ne 'POST' ) { 68 | print STDOUT "\n", $req->method, " ", $req->uri, "\n"; 69 | print_headers( $req, @clt_hdr ); 70 | } 71 | print STDOUT $message->status_line, "\n"; 72 | print_headers( $message, @srv_hdr ); 73 | } 74 | ); 75 | 76 | sub print_headers { 77 | my $message = shift; 78 | for my $h (@_) { 79 | if ( $message->header($h) ) { 80 | print STDOUT " $h: $_\n" for ( $message->header($h) ); 81 | } 82 | } 83 | } 84 | 85 | # create and start the proxy 86 | my $proxy = HTTP::Proxy->new(@ARGV); 87 | 88 | # if we want to look at SOME sites 89 | if (@{$args{peek}}) { 90 | for (@{$args{peek}}) { 91 | $proxy->push_filter( 92 | host => $_, 93 | method => 'POST', 94 | request => $post_filter 95 | ); 96 | $proxy->push_filter( 97 | host => $_, 98 | response => $get_filter, 99 | mime => $args{mime}, 100 | ); 101 | } 102 | } 103 | # otherwise, peek at all sites 104 | else { 105 | $proxy->push_filter( 106 | method => 'POST', 107 | request => $post_filter 108 | ); 109 | $proxy->push_filter( response => $get_filter, mime => $args{mime} ); 110 | } 111 | 112 | $proxy->start; 113 | 114 | -------------------------------------------------------------------------------- /eg/outline.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use HTTP::Proxy qw( :log ); 3 | use HTTP::Proxy::BodyFilter::htmlparser; 4 | use HTML::Parser; 5 | use strict; 6 | 7 | my $parser = HTML::Parser->new( api_version => 3 ); 8 | $parser->handler( 9 | start_document => sub { my $self = shift; $self->{print} = 1 }, 10 | "self" 11 | ); 12 | $parser->handler( 13 | start => sub { 14 | my ( $self, $tag, $text ) = @_; 15 | $self->{print} = 1 if $tag =~ /^h\d/; 16 | $self->{output} .= $text if $self->{print}; 17 | $self->{print} = 0 if $tag eq 'body'; 18 | }, 19 | "self,tagname,text" 20 | ); 21 | $parser->handler( 22 | end => sub { 23 | my ( $self, $tag, $text ) = @_; 24 | $self->{print} = 1 if $tag eq 'body'; 25 | $self->{output} .= $text if $self->{print}; 26 | $self->{print} = 0 if $tag =~ /^h\d/; 27 | }, 28 | "self,tagname,text" 29 | ); 30 | $parser->handler( 31 | default => sub { 32 | my ( $self, $text ) = @_; 33 | $self->{output} .= $text if $self->{print}; 34 | }, 35 | "self,text" 36 | ); 37 | 38 | my $filter = HTTP::Proxy::BodyFilter::htmlparser->new( $parser, rw => 1 ); 39 | 40 | my $proxy = HTTP::Proxy->new(@ARGV); 41 | $proxy->push_filter( mime => 'text/html', response => $filter ); 42 | $proxy->start; 43 | 44 | -------------------------------------------------------------------------------- /eg/pdf.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # 3 | # Saves all PDF files, and just confirm saving to the client 4 | # (the PDF file never arrives to the client, but is replaced by 5 | # a simple HTML file) 6 | # 7 | # Based on a request by Emmanuel Di Prétoro 8 | # 9 | use strict; 10 | use warnings; 11 | use HTTP::Proxy qw ( :log ); 12 | use HTTP::Proxy::BodyFilter::save; 13 | use HTTP::Proxy::BodyFilter::simple; 14 | use HTTP::Proxy::HeaderFilter::simple; 15 | 16 | my $proxy = HTTP::Proxy->new( @ARGV ); 17 | 18 | my $saved; 19 | $proxy->push_filter( 20 | # you should probably restrict this to certain hosts as well 21 | path => qr/\.pdf$/, 22 | mime => 'application/pdf', 23 | # save the PDF 24 | response => HTTP::Proxy::BodyFilter::save->new( 25 | template => "%f", 26 | prefix => 'pdf' 27 | ), 28 | # send a HTML message instead 29 | response => HTTP::Proxy::BodyFilter::simple->new( 30 | begin => sub { 31 | my ( $self, $message ) = @_; # for information, saorge 32 | $saved = 0; 33 | }, 34 | filter => sub { 35 | my ( $self, $dataref, $message, $protocol, $buffer ) = @_; 36 | $$dataref = $saved++ ? "" 37 | : sprintf '

Saving PDF file. Go back

', 38 | $message->request->header('referer'); 39 | } 40 | ), 41 | # change the response Content-Type 42 | response => HTTP::Proxy::HeaderFilter::simple->new( 43 | sub { 44 | my ( $self, $headers, $response ) = @_; 45 | $headers->content_type('text/html'); 46 | } 47 | ), 48 | ); 49 | 50 | $proxy->start; 51 | 52 | -------------------------------------------------------------------------------- /eg/perlmonks.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use HTTP::Proxy qw( :log ); 3 | use HTTP::Proxy::HeaderFilter::simple; 4 | use strict; 5 | 6 | # a very simple proxy 7 | my $proxy = HTTP::Proxy->new(@ARGV); 8 | 9 | # this filter redirects all requests to perlmonks.org 10 | my $filter = HTTP::Proxy::HeaderFilter::simple->new( 11 | sub { 12 | my ( $self, $headers, $message ) = @_; 13 | 14 | # modify the host part of the request 15 | $self->proxy()->log( ERROR, "FOO", $message->uri() ); 16 | $message->uri()->host('perlmonks.org'); 17 | 18 | # create a new redirect response 19 | my $res = HTTP::Response->new( 20 | 301, 21 | 'Moved to perlmonks.org', 22 | [ Location => $message->uri() ] 23 | ); 24 | 25 | # and make the proxy send it back to the client 26 | $self->proxy()->response($res); 27 | } 28 | ); 29 | 30 | # put this filter on perlmonks.com and www.perlmonks.org 31 | $proxy->push_filter( host => 'perlmonks.com', request => $filter ); 32 | $proxy->push_filter( host => 'www.perlmonks.org', request => $filter ); 33 | 34 | $proxy->start(); 35 | -------------------------------------------------------------------------------- /eg/post.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use strict; 3 | use HTTP::Proxy qw( :log ); 4 | use HTTP::Proxy::BodyFilter::simple; 5 | use CGI::Util qw( unescape ); 6 | 7 | # NOTE: Body request filters always receive the request body in one pass 8 | my $filter = HTTP::Proxy::BodyFilter::simple->new( 9 | sub { 10 | my ( $self, $dataref, $message, $protocol, $buffer ) = @_; 11 | print STDOUT $message->method, " ", $message->uri, "\n"; 12 | 13 | # this is from CGI.pm, method parse_params 14 | my (@pairs) = split ( /[&;]/, $$dataref ); 15 | for (@pairs) { 16 | my ( $param, $value ) = split ( '=', $_, 2 ); 17 | $param = unescape($param); 18 | $value = unescape($value); 19 | printf STDOUT " %-30s => %s\n", $param, $value; 20 | } 21 | } 22 | ); 23 | 24 | my $proxy = HTTP::Proxy->new(@ARGV); 25 | $proxy->push_filter( method => 'POST', request => $filter ); 26 | $proxy->start; 27 | 28 | -------------------------------------------------------------------------------- /eg/proxy-auth.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use HTTP::Proxy qw( :log ); 3 | use HTTP::Proxy::HeaderFilter::simple; 4 | use MIME::Base64 qw( encode_base64 ); 5 | use strict; 6 | 7 | # the encoded user:password pair 8 | # login: http 9 | # passwd: proxy 10 | my $token = "Basic " . encode_base64( "http:proxy", '' ); 11 | 12 | # a very simple proxy that requires authentication 13 | my $proxy = HTTP::Proxy->new(@ARGV); 14 | 15 | # the authentication filter 16 | $proxy->push_filter( 17 | request => HTTP::Proxy::HeaderFilter::simple->new( 18 | sub { 19 | my ( $self, $headers, $request ) = @_; 20 | 21 | # check the token against all credentials 22 | my $ok = 0; 23 | $_ eq $token && $ok++ 24 | for $self->proxy->hop_headers->header('Proxy-Authorization'); 25 | 26 | # no valid credential 27 | if ( !$ok ) { 28 | my $response = HTTP::Response->new(407); 29 | $response->header( 30 | Proxy_Authenticate => 'Basic realm="HTTP::Proxy"' ); 31 | $self->proxy->response($response); 32 | } 33 | } 34 | ) 35 | ); 36 | 37 | $proxy->start; 38 | 39 | -------------------------------------------------------------------------------- /eg/proxy.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use HTTP::Proxy qw( :log ); 3 | use strict; 4 | 5 | # a very simple proxy 6 | my $proxy = HTTP::Proxy->new(@ARGV); 7 | $proxy->start; 8 | -------------------------------------------------------------------------------- /eg/rainbow.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use HTTP::Proxy qw( :log ); 3 | use HTTP::Proxy::BodyFilter::tags; 4 | use HTTP::Proxy::BodyFilter::simple; 5 | use HTTP::Proxy::BodyFilter::htmltext; 6 | use strict; 7 | 8 | my $proxy = HTTP::Proxy->new(@ARGV); 9 | 10 | $proxy->push_filter( 11 | mime => 'text/html', 12 | response => HTTP::Proxy::BodyFilter::tags->new, # protect tags 13 | response => HTTP::Proxy::BodyFilter::simple->new( # rainbow entities 14 | sub { ${ $_[1] } =~ s/(&[#\w]+;)/rainbow($1)/eg; } 15 | ), 16 | response => HTTP::Proxy::BodyFilter::htmltext->new( # rainbow text 17 | sub { s/(\S)/rainbow($1)/eg; } 18 | ) 19 | ); 20 | 21 | sub rainbow { 22 | return sprintf qq{%s}, next_color(), shift; 23 | } 24 | 25 | # the following code courtesy David 'grinder' Landgren 26 | # but adapted for our needs 27 | use constant PI_2 => 3.14159265359 * 2; 28 | my @PRIMES = qw/11 13 17 19 23 29 31 37 41 43 47 53 59/; 29 | my $red = rand() * PI_2; 30 | my $green = rand() * PI_2; 31 | my $blue = rand() * PI_2; 32 | my $rdelta = PI_2 / $PRIMES[ rand scalar @PRIMES ]; 33 | my $gdelta = PI_2 / $PRIMES[ rand scalar @PRIMES ]; 34 | my $bdelta = PI_2 / $PRIMES[ rand scalar @PRIMES ]; 35 | my ( $rp, $gp, $bp ) = ( sin $red, sin $green, sin $blue ); 36 | my ( $rq, $gq, $bq ) = qw/ 0 0 0/; 37 | my ( $rr, $gr, $br ) = qw/ 0 0 0/; 38 | 39 | $proxy->start; 40 | 41 | sub next_color { 42 | my $rs = sin( $red += $rdelta ); 43 | my $rc = $rs * 120 + 120; 44 | my $gs = sin( $green += $gdelta ); 45 | my $gc = $gs * 120 + 120; 46 | my $bs = sin( $blue += $bdelta ); 47 | my $bc = $bs * 120 + 120; 48 | 49 | $rq = $rp <=> $rs; 50 | $gq = $gp <=> $gs; 51 | $bq = $bp <=> $bs; 52 | 53 | $rp = $rs; 54 | $gp = $gs; 55 | $bp = $bs; 56 | 57 | $rdelta = PI_2 / $PRIMES[ rand scalar @PRIMES ] 58 | if ( $rr == 1 and $rq < 1 and $rs < 1 ); 59 | $gdelta = PI_2 / $PRIMES[ rand scalar @PRIMES ] 60 | if ( $gr == 1 and $gq < 1 and $gs < 1 ); 61 | $bdelta = PI_2 / $PRIMES[ rand scalar @PRIMES ] 62 | if ( $br == 1 and $bq < 1 and $bs < 1 ); 63 | 64 | $rr = $rq; 65 | $gr = $gq; 66 | $br = $bq; 67 | 68 | $rc = ( $rc < 0 ) ? 0 : ( $rc > 255 ) ? 255 : $rc; 69 | $gc = ( $gc < 0 ) ? 0 : ( $gc > 255 ) ? 255 : $gc; 70 | $bc = ( $bc < 0 ) ? 0 : ( $bc > 255 ) ? 255 : $bc; 71 | 72 | return sprintf( "#%02x%02x%02x", $rc, $gc, $bc ); 73 | } 74 | 75 | -------------------------------------------------------------------------------- /eg/rfc.pl: -------------------------------------------------------------------------------- 1 | use HTTP::Proxy; 2 | use HTTP::Proxy::BodyFilter::save; 3 | 4 | my $proxy = HTTP::Proxy->new(@ARGV); 5 | 6 | # save RFC files as we browse them 7 | $proxy->push_filter( 8 | path => qr!/rfc\d+.txt!, 9 | mime => 'text/plain', 10 | response => HTTP::Proxy::BodyFilter::save->new( 11 | template => '%f', 12 | prefix => 'rfc', 13 | multiple => 0, 14 | keep_old => 1, 15 | ) 16 | ); 17 | 18 | $proxy->start; 19 | 20 | -------------------------------------------------------------------------------- /eg/rot13.pl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/book/HTTP-Proxy/7874e910b9d5ffaf55afc106a3e080149879f50b/eg/rot13.pl -------------------------------------------------------------------------------- /eg/switch.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use HTTP::Proxy; 3 | use HTTP::Proxy::HeaderFilter::simple; 4 | 5 | # call this proxy as 6 | # eg/switch.pl proxy http://proxy1:port/,http://proxy2:port/ 7 | my %args = @ARGV; 8 | my @proxy = split/,/, $args{proxy}; 9 | my $proxy = HTTP::Proxy->new(@ARGV); 10 | 11 | $proxy->push_filter( 12 | request => HTTP::Proxy::HeaderFilter::simple->new( 13 | sub { 14 | shift->proxy->agent->proxy( http => $proxy[ rand @proxy ] ); 15 | } 16 | ) 17 | ); 18 | 19 | $proxy->start; 20 | -------------------------------------------------------------------------------- /eg/tracker.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use HTTP::Proxy; 3 | use HTTP::Proxy::HeaderFilter::simple; 4 | use Fcntl ':flock'; 5 | use strict; 6 | 7 | # this is a tracker proxy that stores Referer, URL, CODE 8 | # and output them to STDOUT or the given file 9 | # 10 | # Example output: 11 | # 12 | # NULL http://www.perl.org/ 200 13 | # http://www.perl.org/ http://learn.perl.org/ 200 14 | # 15 | my $file = shift || '-'; 16 | open OUT, ">> $file" or die "Can't open $file: $!"; 17 | 18 | my $proxy = HTTP::Proxy->new( @ARGV ); # pass the args you want 19 | $proxy->push_filter( 20 | response => HTTP::Proxy::HeaderFilter::simple->new( 21 | sub { 22 | my ( $self, $headers, $message ) = @_; 23 | 24 | flock( OUT, LOCK_EX ); 25 | print OUT join( " ", 26 | $message->request->headers->header( 'Referer' ) || 'NULL', 27 | $message->request->uri, 28 | $message->code ), $/; 29 | flock( OUT, LOCK_UN ); 30 | } 31 | ) 32 | ); 33 | $proxy->start; 34 | 35 | END { close OUT; } 36 | 37 | -------------------------------------------------------------------------------- /eg/trim.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use HTTP::Proxy qw( :log ); 3 | use HTTP::Proxy::BodyFilter::lines; 4 | use HTTP::Proxy::BodyFilter::simple; 5 | use strict; 6 | 7 | my $proxy = HTTP::Proxy->new(@ARGV); 8 | 9 | # a simple proxy that trims whitespace in HTML 10 | $proxy->push_filter( 11 | mime => 'text/html', 12 | response => HTTP::Proxy::BodyFilter::lines->new(), 13 | response => HTTP::Proxy::BodyFilter::simple->new( 14 | sub { 15 | my ($self, $dataref ) = @_; 16 | $$dataref =~ s/^\s+//m; # multi-line data 17 | $$dataref =~ s/\s+$//m; 18 | } 19 | ) 20 | ); 21 | 22 | $proxy->start; 23 | -------------------------------------------------------------------------------- /eg/yahoogroups.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use strict; 3 | use HTTP::Proxy qw( :log ); 4 | use HTTP::Proxy::HeaderFilter::simple; 5 | use CGI::Util qw( unescape ); 6 | 7 | my $proxy = HTTP::Proxy->new(@ARGV); 8 | 9 | $proxy->push_filter( 10 | host => 'groups.yahoo.com', 11 | response => HTTP::Proxy::HeaderFilter::simple->new( 12 | sub { 13 | my ( $self, $headers, $message ) = @_; 14 | my $location; 15 | 16 | # ads start by redirecting to 'interrupt' 17 | return 18 | unless ( $location = $headers->header('Location') ) 19 | && $location =~ m!/interrupt\?!; 20 | 21 | # fetch the ad page (we need the cookie) 22 | # use a new request to avoid modifying the original one 23 | $self->proxy->log( FILTERS, "YAHOOGROUPS", 24 | "Ad interrupt detected: fetching $location" ); 25 | my $r = $self->proxy->agent->simple_request( 26 | HTTP::Request->new( 27 | GET => $location, 28 | $message->request->headers # headers are cloned 29 | ) 30 | ); 31 | 32 | # redirect to our original destination 33 | # which was stored in the 'done' parameter 34 | # and pass the cookie along 35 | $location = unescape($location); 36 | $location =~ s|^(http://[^/]*).*done=([^&]*).*$|$1$2|; 37 | $headers->header( Location => $location ); 38 | $headers->header( Set_Cookie => $r->header('Set_Cookie') ); 39 | $self->proxy->log( FILTERS, "YAHOOGROUPS", 40 | "Set-Cookie: " . $r->header('Set_Cookie') ); 41 | } 42 | ) 43 | ); 44 | 45 | $proxy->start; 46 | 47 | -------------------------------------------------------------------------------- /html/.htaccess: -------------------------------------------------------------------------------- 1 | Options +Indexes 2 | IndexOptions FancyIndexing 3 | IndexOptions +NameWidth=* 4 | IndexOptions +DescriptionWidth=* 5 | 6 | AddDescription "SVN Repository snapshot" http-proxy-HEAD.tar.gz 7 | AddDescription "Tests fail, t/Utils.pm is missing" HTTP-Proxy-0.06.tar.gz 8 | AddDescription "New filter API" HTTP-Proxy-0.10.tar.gz 9 | AddDescription "CONNECT support" HTTP-Proxy-0.13.tar.gz 10 | AddDescription "Bug in Makefile.PL :-(" HTTP-Proxy-0.15.tar.gz 11 | AddDescription "HTTP::Proxy::Engine introduced" HTTP-Proxy-0.16.tar.gz 12 | AddDescription "Support for relaying CONNECT" HTTP-Proxy-0.23.tar.gz 13 | -------------------------------------------------------------------------------- /html/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/book/HTTP-Proxy/7874e910b9d5ffaf55afc106a3e080149879f50b/html/favicon.ico -------------------------------------------------------------------------------- /html/http-proxy.css: -------------------------------------------------------------------------------- 1 | BODY, .normal, .menu { 2 | font-family: Verdana, Arial, Helvetica, Geneva, sans-serif; 3 | background: white; 4 | color: black 5 | } 6 | CODE, PRE { 7 | font-family: "Lucida Console", monospace 8 | } 9 | PRE {font-family: "Lucida Console", monospace} 10 | HR {color: #bbbbbb} 11 | H1, H2, H3, H4, H5, H6 {font-family: Verdana, Arial, Helvetica, Geneva, sans-serif} 12 | 13 | A:link { color: blue } 14 | A:visited { color: purple } 15 | A:active { color: red } 16 | 17 | .menu {font-size: 75%} 18 | 19 | .new {color: #ee3333; font-variant: small-caps; font-weight: bold} 20 | .upd {color: #33cc66; font-variant: small-caps; font-weight: bold} 21 | 22 | -------------------------------------------------------------------------------- /html/robots.txt: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /icons/ 3 | -------------------------------------------------------------------------------- /html/talks/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/book/HTTP-Proxy/7874e910b9d5ffaf55afc106a3e080149879f50b/html/talks/index.html -------------------------------------------------------------------------------- /html/talks/ye2004/html-syntax.css: -------------------------------------------------------------------------------- 1 | /* ====================================================================== * 2 | * Sample stylesheet for Syntax::Highlight::HTML * 3 | * * 4 | * Copyright (C)2004 Sebastien Aperghis-Tramoni, All Rights Reserved. * 5 | * * 6 | * This file is free software; you can redistribute it and/or modify * 7 | * it under the same terms as Perl itself. * 8 | * ====================================================================== */ 9 | 10 | 11 | .h-decl { color: #336699; font-style: italic; } /* doctype declaration */ 12 | .h-pi { color: #336699; } /* process instruction */ 13 | .h-com { color: #338833; font-style: italic; } /* comment */ 14 | .h-ab { color: #000000; font-weight: bold; } /* angles as tag delim. */ 15 | .h-tag { color: #993399; font-weight: bold; } /* tag name */ 16 | .h-attr { color: #000000; font-weight: bold; } /* attribute name */ 17 | .h-attv { color: #333399; } /* attribute value */ 18 | .h-ent { color: #cc3333; } /* entity */ 19 | 20 | .h-lno { color: #cccccc; background: #eee; } /* line numbers */ 21 | -------------------------------------------------------------------------------- /html/talks/ye2004/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/book/HTTP-Proxy/7874e910b9d5ffaf55afc106a3e080149879f50b/html/talks/ye2004/index.html -------------------------------------------------------------------------------- /html/talks/ye2004/perltidy.css: -------------------------------------------------------------------------------- 1 | /* 2 | * perltidy's styles 3 | */ 4 | .c { color: #228B22; } /* comment */ 5 | .cm { color: #000000; } /* comma */ 6 | .co { color: #000000; } /* colon */ 7 | .h { color: #CD5555; font-weight:bold; } /* here-doc-target */ 8 | .hh { color: #CD5555; font-style:italic; } /* here-doc-text */ 9 | .i { color: #00688B; } /* identifier */ 10 | .j { color: #000000; font-weight:bold; } /* label */ 11 | .k { color: #8B4513; font-weight:bold; } /* keyword */ 12 | .m { color: #FF0000; font-weight:bold; } /* subroutine */ 13 | .n { color: #B452CD; } /* numeric */ 14 | .p { color: #000000; } /* paren */ 15 | .pd { color: #228B22; font-style:italic; } /* pod-text */ 16 | .pu { color: #000000; } /* punctuation */ 17 | .q { color: #CD5555; } /* quote */ 18 | .s { color: #000000; } /* structure */ 19 | .sc { color: #000000; } /* semicolon */ 20 | .v { color: #B452CD; } /* v-string */ 21 | .w { color: #000000; } /* bareword */ 22 | 23 | -------------------------------------------------------------------------------- /html/talks/ye2004/podpoint.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/book/HTTP-Proxy/7874e910b9d5ffaf55afc106a3e080149879f50b/html/talks/ye2004/podpoint.css -------------------------------------------------------------------------------- /html/talks/ye2004/slide001.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | What is HTTP::Proxy? 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 1/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

What is HTTP::Proxy?

30 | 31 |

HTTP::Proxy is a web proxy written in pure Perl.

32 |

And quite simple to use:

33 |
    #!/usr/bin/perl -w
34 |     use HTTP::Proxy;
35 |     use strict;
36 |     
37 |     # a very simple proxy
38 |     HTTP::Proxy->new->start;
39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide002.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | There are other web proxies in Perl 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 2/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

There are other web proxies in Perl

30 | 31 |

Oh yes. And written by gurus.

32 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide003.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Why not use Squid? 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 3/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Why not use Squid?

30 | 31 |
    32 |
  • 33 |

    Let me make this clear for you all: Squid is meant for CACHING.

    34 |
  • 35 |
  • 36 |

    Mmm, high-performance caching.

    37 |
  • 38 |
  • 39 |

    HTTP::Proxy gives you total control over your web experience.

    40 |
  • 41 |
  • 42 |

    If you want speed, cache and some kind of filtering, you can leave the 43 | room and continue to use Squid.

    44 |
  • 45 |
  • 46 |

    If you want TOTAL CONTROL of the web, you can stay a little longer. 47 | ;-)

    48 |
  • 49 |
50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide004.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | How does it work? 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 4/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

How does it work?

30 | 31 |
    32 |
  • 33 |

    A proxy is a program that is both a HTTP client and a HTTP server:

    34 |
      35 |
    • 36 |

      the proxy server talks to your browser

      37 |
    • 38 |
    • 39 |

      the proxy client talks to the origin server

      40 |
    • 41 |
    • 42 |

      the proxy moves the data back and forth between them

      43 |
    • 44 |
    45 |
  • 46 |
  • 47 |

    The proxy is on the path of all data going back and forth, so:

    48 |
      49 |
    • 50 |

      it can look at everything going through

      51 |
    • 52 |
    • 53 |

      it can modify everything going through

      54 |
    • 55 |
    56 |
  • 57 |
58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide005.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | HTTP Connections (1) 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 5/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

HTTP Connections (1)

30 | 31 |

To understand how HTTP::Proxy works, you need to know 32 | what happens during a HTTP connection.

33 |

A HTTP connection is divided in two messages:

34 |
    35 |
  • 36 |

    Request

    37 |
        GET / HTTP/1.1
    38 | 39 |
  • 40 |
  • 41 |

    Response

    42 |
        HTTP/1.1 200 OK
    43 | 44 |
  • 45 |
46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide006.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | HTTP Connections (2) 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 6/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

HTTP Connections (2)

30 | 31 |

Each message is also divided in two:

32 |
    33 |
  • 34 |

    Headers

    35 |
        Connection: close
    36 |     Date: Tue, 03 Aug 2004 09:07:02 GMT
    37 |     Accept-Ranges: bytes
    38 |     ETag: "3f80f-1b6-3e1cb03b"
    39 |     Server: Apache/1.3.27 (Unix)  (Red-Hat/Linux)
    40 |     Content-Length: 438
    41 |     Content-Type: text/html
    42 |     ...
    43 | 44 |
  • 45 |
  • 46 |

    Body

    47 |
    48 |     <HTML>
    49 |     <HEAD>
    50 |       <TITLE>Example Web Page</TITLE>
    51 |     </HEAD>
    52 |     <body>
    53 |     <p>You have reached this web page by typing &quot;example.com&quot;,
    54 |     &quot;example.net&quot;,
    55 |     ...
    56 |
  • 57 |
58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide007.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | HTTP proxy and HTTP connections 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 7/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

HTTP proxy and HTTP connections

30 | 31 |
    32 |
  • 33 |

    A HTTP connection is divided in four parts: request-headers, request-body, 34 | response-headers and response-body

    35 |
  • 36 |
  • 37 |

    HTTP::Proxy can therefore look at/modify the data in four different 38 | places and in two different ways (headers/body)

    39 |
  • 40 |
  • 41 |

    You know Unix, so you know the power of:

    42 |
        grep | cut | sort | nl
    43 | 44 |
  • 45 |
  • 46 |

    HTTP::Proxy also uses filters that modify a flow of data (for the message 47 | bodies). They also can be stacked, which brings you the power to 48 | forever alter the face of the web (!!!)

    49 |
  • 50 |
51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide008.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Filters (1) 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 8/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Filters (1)

30 | 31 |
    32 |
  • 33 |

    Before version 0.10, filters were simple callbacks: code references 34 | added by hand

    35 |
  • 36 |
  • 37 |

    Version 0.10 brought HTTP::Proxy::HeaderFilter and HTTP::Proxy::BodyFilter

    38 |
      39 |
    • 40 |

      they are object classes

      41 |
    • 42 |
    • 43 |

      they define an API

      44 |
    • 45 |
    • 46 |

      the class determines on which part of the message they are applied

      47 |
    • 48 |
    • 49 |

      every filter used by HTTP::Proxy must be a subclass of one of those

      50 |
    • 51 |
    • 52 |

      "standard" filters are all lowercase

      53 |
    • 54 |
    55 |
  • 56 |
  • 57 |

    HTTP::Proxy::HeaderFilter::standard is used for all standard headers 58 | (removing hop-by-hop headers, adding Via: headers, etc.)

    59 |
  • 60 |
61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide009.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Filters (2) 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 9/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Filters (2)

30 | 31 |
    32 |
  • 33 |

    Filters should adhere to the Unix philosophy: simple tools that do 34 | one simple thing and do it well

    35 |
  • 36 |
  • 37 |

    Each header filter:

    38 |
      39 |
    • 40 |

      is only called once, with a reference to the HTTP::Headers object

      41 |
    • 42 |
    • 43 |

      can modify the message headers

      44 |
    • 45 |
    46 |
  • 47 |
  • 48 |

    Each body filter:

    49 |
      50 |
    • 51 |

      is called each time a chunk of data is received

      52 |
    • 53 |
    • 54 |

      can modify the data on the fly

      55 |
    • 56 |
    57 |
  • 58 |
  • 59 |

    The proxy passes the possibly modified chunk on to the next filter 60 | in the stack

    61 |
  • 62 |
  • 63 |

    In the end, the proxy sends the modified chunk to the client

    64 |
  • 65 |
  • 66 |

    But not all filters are applied on all connections!

    67 |
  • 68 |
69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide010.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Filter selection (1) 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 10/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Filter selection (1)

30 | 31 |
    32 |
  • 33 |

    Filters are activated according to certain criteria:

    34 |
      35 |
    • 36 |

      Request

      37 |
        38 |
      • 39 |

        method

        40 |
      • 41 |
      • 42 |

        scheme

        43 |
      • 44 |
      • 45 |

        host

        46 |
      • 47 |
      • 48 |

        path

        49 |
      • 50 |
      • 51 |

        query-string

        52 |
      • 53 |
      54 |
    • 55 |
    • 56 |

      Response

      57 |
        58 |
      • 59 |

        All of the above

        60 |
      • 61 |
      • 62 |

        MIME type (text/*, etc.)

        63 |
      • 64 |
      65 |
    • 66 |
    67 |
  • 68 |
  • 69 |

    The push_filter() method is using these criteria to create a 70 | "match subroutine". A filter stack is actually a stack of 71 | [ $match_sub, $filter ] pairs.

    72 |
  • 73 |
  • 74 |

    The match subroutine is used to create the actual list of filters 75 | that will act on the HTTP messages.

    76 |
  • 77 |
78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide011.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Filter selection (2) 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 11/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Filter selection (2)

30 | 31 |
    32 |
  • 33 |

    A simple example:

    34 |
        # $filter is a filter object
    35 |     $proxy->push_filter(
    36 |         response => $filter,       # only responses
    37 |         host     => 'example.com', # coming from example.com
    38 |         mime     => 'text/html',   # as HTML file
    39 |     );
    40 |
  • 41 |
  • 42 |

    Naturaly, the ordering of filters is important, since each filter 43 | receives data chunks from its predecessor, possibly modifies them 44 | and passes them on to the next filter in the chain.

    45 |
        # first, require authentication for everything
    46 |     $proxy->push_filter(
    47 |          request => $proxy_auth,  # 407 unless user is authenticated
    48 |     );
    49 |     # then modify example.com
    50 |     $proxy->push_filter(
    51 |        response => $filter,       # only responses
    52 |        host     => 'example.com', # coming from example.com
    53 |        mime     => 'text/html',   # as HTML file
    54 |     );
    55 |
  • 56 |
57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide012.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Filter selection (3) 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 12/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Filter selection (3)

30 | 31 |
    32 |
  • 33 |

    Filters sharing the same match routine can be pushed within 34 | a single call:

    35 |
        $proxy->push_filter(
    36 |         host     => 'example.com',
    37 |         request  => $req_filter,
    38 |         response => $res_filter,
    39 |         response => $res_filter2,
    40 |     );
    41 |
  • 42 |
  • 43 |

    In this example,

    44 |
      45 |
    • 46 |

      $req_filter is applied on all requests to example.com

      47 |
    • 48 |
    • 49 |

      $res_filter receives the response body data for all 50 | requests to example.com before $res_filter2

      51 |
    • 52 |
    53 |
  • 54 |
  • 55 |

    The filter class is used by the proxy to decide if the filter is a header 56 | filter or a body filter

    57 |
  • 58 |
59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide013.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Filter methods 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 13/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Filter methods

30 | 31 |
    32 |
  • 33 |

    As objects, filters implement a number of methods

    34 |
  • 35 |
  • 36 |

    filter() is the main method, it is called each time a chunk of 37 | data is received

    38 |
  • 39 |
  • 40 |

    Other methods are available:

    41 |
      42 |
    • 43 |

      init() is called once, when the filter is created

      44 |
    • 45 |
    • 46 |

      start() is called before the first chunk of data is received. 47 | It can be used for per message initialisation.

      48 |
    • 49 |
    • 50 |

      end() is called after the last chunk of data has been received. 51 | It can be used for per message cleaning up.

      52 |
    • 53 |
    54 |
  • 55 |
56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide014.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Time to show off! 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 14/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Time to show off!

30 | 31 |
    32 |
  • 33 |

    We're already at slide 14, and I'm afraid you're all asleep

    34 |
  • 35 |
  • 36 |

    It's clearly the time for a DEMO

    37 |
  • 38 |
  • 39 |

    Remember, I told you I could forever alter the face of the web...

    40 |
  • 41 |
  • 42 |

    Well, let's deface the W3C first.

    43 |
  • 44 |
  • 45 |

    The demo shows the following filters: utf-8 to iso-8859-1, rot-13, 46 | leetspeak, AYB, outline, AYB + outline, Swedish Chef, JavaScript...

    47 |
  • 48 |
49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide015.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | HTTP::Proxy::HeaderFilter 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 15/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

HTTP::Proxy::HeaderFilter

30 | 31 |
    32 |
  • 33 |

    This is the base class for manipulating message headers

    34 |
  • 35 |
  • 36 |

    The filter() method has the following signature:

    37 |
        sub filter {
    38 |         my ( $self, $headers, $message ) = @_;
    39 |         ...
    40 |     }
    41 |
  • 42 |
  • 43 |

    The filter can create a response and make the proxy send it directly 44 | by using the proxy response() accessor.

    45 |
        $filter = HTTP::Proxy::HeaderFilter::simple->new(
    46 |         sub {
    47 |             my ( $self, $headers, $message ) = @_;
    48 |             # random error
    49 |             $self->proxy->response(
    50 |                 HTTP::Response->new(
    51 |                     403, 
    52 |                     "<h1>Nothing to see here, move along.</h1>",
    53 |                     [ 'Content-Type' => 'text/html' ]
    54 |                 )
    55 |             ) if rand > .7;
    56 |         }
    57 |     );
    58 |
  • 59 |
  • 60 |

    This completely shortcuts the origin server

    61 |
  • 62 |
63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide016.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | HTTP::Proxy::BodyFilter 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 16/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

HTTP::Proxy::BodyFilter

30 | 31 |
    32 |
  • 33 |

    This is the base class for manipulating message bodies

    34 |
  • 35 |
  • 36 |

    The filter() method has the following signature:

    37 |
        sub filter {
    38 |         my ( $self, $dataref, $message, $protocol, $buffer ) = @_;
    39 |         ...
    40 |     }
    41 |
  • 42 |
  • 43 |

    In the case of response filters, the headers ($message->headers()) 44 | have already been sent to the client, so technically they're read-only.

    45 |
  • 46 |
47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide017.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Simple filters 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 17/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Simple filters

30 | 31 |
    32 |
  • 33 |

    Simple filters are created with HTTP::Proxy::BodyFilter::simple 34 | or HTTP::Proxy::HeaderFilter::simple.

    35 |
  • 36 |
  • 37 |

    These are filter factories that return a filter object given the 38 | filtering routine

    39 |
        $filter = HTTP::Proxy::BodyFilter::simple->new(
    40 |         sub { ${ $_[1] } =~ s/foo/bar/g; }
    41 |     );
    42 |
  • 43 |
  • 44 |

    The routine must have the same signature as the filter() method.

    45 |
  • 46 |
  • 47 |

    One can also use:

    48 |
        $filter = HTTP::Proxy::BodyFilter::simple->new(
    49 |         filter => \&filter_sub,
    50 |         start  => \&start_sub,
    51 |     );
    52 |

    to define the other standard methods.

    53 |
  • 54 |
  • 55 |

    HTTP::Proxy::BodyFilter::simple and HTTP::Proxy::HeaderFilter::simple 56 | are conceptually identical

    57 |
  • 58 |
59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide018.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Simple filters that fail miserably 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 18/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Simple filters that fail miserably

30 | 31 |
    32 |
  • 33 |

    Imagine I hate the <em> tag and only want to see <i> tags

    34 |
  • 35 |
  • 36 |

    The filtering routine is easy to write:

    37 |
        s!<(/?)em\b!<$1i!g;
    38 |
  • 39 |
  • 40 |

    And so is the filter:

    41 |
        $filter = HTTP::Proxy::BodyFilter::simple->new(
    42 |         sub { ${ $_[1] } =~ s!<(/?)em\b!<$1i!g; }
    43 |     );
    44 |
  • 45 |
  • 46 |

    Except that the filter might (and surely will) receive a chunk of data 47 | containing an incomplete tag, like

    48 |
    49 |     ove <b>&lt;em&gt;</b> tags, love, <em>love</e
    50 | 51 |
  • 52 |
  • 53 |

    In this case, your regular expression won't match the closing tag 54 | and the transmogrified HTML will not be well-formed any more.

    55 |
  • 56 |
  • 57 |

    You have to make sure no tag is cut. Maybe you could use HTML::Parser?

    58 |
  • 59 |
60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide019.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Helper filters 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 19/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Helper filters

30 | 31 |
    32 |
  • 33 |

    Several "standard" filters are shipped with HTTP::Proxy.

    34 |
  • 35 |
  • 36 |

    One of them is the HTTP::Proxy::BodyFilter::tags filter, which 37 | makes sure that subsequent filters will not receive an incomplete HTML tag

    38 |
  • 39 |
  • 40 |

    And now our simple filter just works:

    41 |
          $proxy->push_filter(
    42 |           host     => 'example.com',
    43 |           response => HTTP::Proxy::BodyFilter::tags->new(),
    44 |           response => HTTP::Proxy::BodyFilter::simple->new(
    45 |               sub { ${$_[1]} =~ s!<(/?)em\b!<$1i!g; }
    46 |           )
    47 |       );
    48 |
  • 49 |
  • 50 |

    When the HTTP::Proxy::BodyFilter::tags filter finds a < bracket 51 | without the matching >, it prevents the last piece of data to be sent 52 | to the next filter in the chain

    53 |
  • 54 |
  • 55 |

    The missing bit is prepended to the next chunk of data going through the 56 | filters

    57 |
  • 58 |
59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide020.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | The buffering system 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 20/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

The buffering system

30 | 31 |
    32 |
  • 33 |

    HTTP::Proxy's buffering system is what makes the 34 | HTTP::Proxy::BodyFilter::tags filter possible.

    35 |
  • 36 |
  • 37 |

    Each body filter is passed a reference to an empty string where it 38 | can store data that will be prepended to the next chunk of data

    39 |
  • 40 |
  • 41 |

    You can keep stuff in memory for later use (for example that 42 | annoying half-tag)

    43 |
  • 44 |
  • 45 |

    After the last chunk is received, the proxy passes a reference to 46 | "" to the filters, so as to empty all the buffers. The reference 47 | to the buffer is then undef, so the filters know it's the last time 48 | they are called for this message.

    49 |
  • 50 |
51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide021.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Other standard filters 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 21/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Other standard filters

30 | 31 |
    32 |
  • 33 |

    HTTP::Proxy::BodyFilter::lines

    34 |

    Ensures that only complete lines are sent to the next filter. 35 | You can change the line-ending definition, just as with $/.

    36 |

    Note that the next filter does not receives data line by line, but 37 | by groups of complete lines (including the EOL characters).

    38 |
  • 39 |
  • 40 |

    HTTP::Proxy::BodyFilter::htmltext

    41 |

    This filter uses a basic parsing scheme to call a user-supplied routine 42 | for each piece of text in the HTML document. 43 | The routine takes no parameter and the chunk of data is stored in $_.

    44 |
        HTTP::Proxy::BodyFilter::htmltext->new( sub { tr/a-zA-z/n-za-mN-ZA-M/ } );
    45 |

    It must be preceded by HTTP::Proxy::BodyFilter::tags.

    46 |
  • 47 |
  • 48 |

    HTTP::Proxy::BodyFilter::htmlparser

    49 |

    This filter lets you use a HTML::Parser object to read the message body. 50 | Modifying the data is a little more complicated, but possible.

    51 |
  • 52 |
  • 53 |

    HTTP::Proxy::BodyFilter::save

    54 |

    This filter saves the data to a file as it flows through the proxy.

    55 |
  • 56 |
57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide022.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Live example: proxy authentication 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 22/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Live example: proxy authentication

30 | 31 |
    use MIME::Base64 qw( encode_base64 );
32 |     
33 |     # the encoded user:password pair
34 |     my $token = "Basic " . encode_base64( "login:password" );
35 |     chomp $token;    # grr
36 |     
37 |     # the authentication filter
38 |     $proxy->push_filter(
39 |         request => HTTP::Proxy::HeaderFilter::simple->new(
40 |             sub {
41 |                 my ( $self, $headers, $request ) = @_;
42 |                 my $auth = $self->proxy->hop_headers->header('Proxy-Authorization')
43 |                   || "";
44 |     
45 |                 # check the hard-coded credentials
46 |                 if ( $auth ne $token ) {
47 |                     my $response = HTTP::Response->new(407);
48 |                     $response->header(
49 |                         Proxy_Authenticate => 'Basic realm="HTTP::Proxy"' );
50 |                     $self->proxy->response($response);
51 |                 }
52 |             }
53 |         )
54 |     );
55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide023.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Live example: dragongoserver (1) 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 23/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Live example: dragongoserver (1)

30 | 31 |
    32 |
  • 33 |

    I'm playing go on http://www.dragongoserver.net/

    34 |
  • 35 |
  • 36 |

    Every time one plays, a textarea is displayed to enable the 37 | sending of a message to their opponent

    38 |
  • 39 |
  • 40 |

    Most of the time, I've got nothing to say, and the textarea is 41 | pushing the submit button away

    42 |
  • 43 |
  • 44 |

    So I removed the offending textarea!

    45 |
        # remove messages from the server
    46 |     $proxy->push_filter(
    47 |         host     => 'www.dragongoserver.net',
    48 |         path     => '^/game.php',
    49 |         response => HTTP::Proxy::BodyFilter::tags->new,
    50 |         response => HTTP::Proxy::BodyFilter::simple->new(
    51 |           sub { ${$_[1]} =~ s|(</?textarea.*>)|<!-- $1 -->| }
    52 |         )
    53 |     );
    54 |
  • 55 |
56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide024.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Live example: dragongoserver (2) 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 24/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Live example: dragongoserver (2)

30 | 31 |
    32 |
  • 33 |

    Sometimes I really want to send a message, though...

    34 |
  • 35 |
  • 36 |

    How can the proxy decide when to show the textarea or not?

    37 |
  • 38 |
  • 39 |

    I could add a msg=yes parameter to the query string...

    40 |

    ... which would tell the proxy "I want to send a message"

    41 |

    ... while the origin server would happily ignore it 42 | (the proxy could also remove it)

    43 |
  • 44 |
  • 45 |

    But I'm lazy, and don't want to type in the address bar!

    46 |
  • 47 |
48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide025.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Live example: dragongoserver (3) 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 25/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Live example: dragongoserver (3)

30 | 31 |

So I let the proxy do all the work:

32 |
    # remove messages from the server
33 |     my $msg = '&msg=yes';
34 |     $proxy->push_filter(
35 |         host     => 'www.dragongoserver.net',
36 |         path     => '^/game.php',
37 |         response => HTTP::Proxy::BodyFilter::tags->new,
38 |         response => HTTP::Proxy::BodyFilter::simple->new(
39 |             sub { 
40 |                 my ( $self, $dataref, $message, $protocol, $buffer ) = @_;
41 |                 my $uri = $message->request->uri;
42 |                 if( $uri =~ s/$msg//o ) { $msg = '' }
43 |                 else { $$dataref =~ s|(</?textarea.*>)|<!-- $1 -->| }
44 |                 $$dataref =~ s|(Message:)|<a href="$uri$msg">$1</a>|;
45 |             }
46 |         )
47 |     );
48 |

Note that this will fail when the string Message: is split between 49 | two chunks of data sent by the server. I'm lucky.

50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide026.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Live example: dragongoserver (4) 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 26/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Live example: dragongoserver (4)

30 | 31 |
    32 |
  • 33 |

    There another interesting page on the server: the status page

    34 |
  • 35 |
  • 36 |

    It lists all the games where it's my turn to play

    37 |
  • 38 |
  • 39 |

    But I have to click on the Status link to refresh the page!

    40 |
  • 41 |
  • 42 |

    Did I tell you I'm lazy?

    43 |
  • 44 |
45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide027.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Live example: dragongoserver (5) 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 27/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Live example: dragongoserver (5)

30 | 31 |
    32 |
  • 33 |

    Once again, I use the query string to communicate with the proxy

    34 |
  • 35 |
  • 36 |

    Being nice to the server, the refresh interval grows after time...

    37 |
        # dragongoserver auto refresh
    38 |     # refresh quickly at first, then more slowly
    39 |     $proxy->push_filter(
    40 |         host     => 'www.dragongoserver.net',
    41 |         path     => '^/status.php',
    42 |         response => HTTP::Proxy::HeaderFilter::simple->new(
    43 |             sub {
    44 |                  my ( $self, $headers, $response ) = @_;
    45 |                  ($response->request->uri->query || '') =~ /goreload=(\d+)/;
    46 |                  my $n = ($1 || 0) + 1;
    47 |                  my $delay = $n < 5 ? 30 : $n < 15 ? 60 : $n < 25 ? 300 : 3600;
    48 |                  $headers->push_header(
    49 |                      Refresh => "$delay;url="
    50 |                               . $response->request->uri->path
    51 |                               . "?goreload=$n"
    52 |                  );
    53 |             }
    54 |         )
    55 |     );
    56 |
  • 57 |
  • 58 |

    So, after about one hour, the status page is only reloaded 59 | every hour.

    60 |
  • 61 |
  • 62 |

    When the user clicks on the "Status" link or when the server redirects 63 | to the original status page, the original URL is queried, thus 64 | restarting the counter

    65 |
  • 66 |
67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide028.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Live example: dragongoserver (6) 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 28/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Live example: dragongoserver (6)

30 | 31 |
    32 |
  • 33 |

    Now that the page is reloaded automatically, I don't want to keep a 34 | browser window opened on it

    35 |
  • 36 |
  • 37 |

    If the title said that I have games waiting instead of just 38 | "Dragon Go Server - Status", that would be useful

    39 |
  • 40 |
  • 41 |

    Well, it could...

    42 |
  • 43 |
  • 44 |

    I just need to change the title after counting the number of games 45 | awaiting me

    46 |
  • 47 |
  • 48 |

    This requires using the buffering system, since the title appears 49 | before the list of games

    50 |
  • 51 |
52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide030.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Conclusion on dragongoserver 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 30/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Conclusion on dragongoserver

30 | 31 |
    32 |
  • 33 |

    The code regarding only dragongoserver is about 70 lines long

    34 |
  • 35 |
  • 36 |

    I changed the web site to suit my needs

    37 |
  • 38 |
  • 39 |

    Maybe I could have sent an email asking for:

    40 |
      41 |
    • 42 |

      progressive refresh

      43 |
    • 44 |
    • 45 |

      optional messages

      46 |
    • 47 |
    • 48 |

      meaningful title

      49 |
    • 50 |
    51 |
  • 52 |
  • 53 |

    Did I say I'm lazy?

    54 |
  • 55 |
56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide031.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Work in progress (1) 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 31/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Work in progress (1)

30 | 31 |

Here is a filter I've created to have a look at my browsing habits 32 | and patterns.

33 |
    #!/usr/bin/perl -w
34 |     use HTTP::Proxy;
35 |     use HTTP::Proxy::HeaderFilter::simple;
36 |     use Fcntl ':flock';
37 |     use strict;
38 |     
39 |     my $file = shift || '-';
40 |     open OUT, ">> $file" or die "Can't open $file: $!";
41 |     
42 |     my $proxy = HTTP::Proxy->new( @ARGV ); # pass any args to the proxy
43 |     
44 |     $proxy->push_filter(
45 |         response => HTTP::Proxy::HeaderFilter::simple->new(
46 |             sub {
47 |                 my ( $self, $headers, $message ) = @_;
48 |     
49 |                 flock( OUT, LOCK_EX );
50 |                 print OUT join( " ",
51 |                       $message->request->headers->header( 'Referer' ) || 'NULL',
52 |                       $message->request->uri,
53 |                       $message->code ), $/;
54 |                 flock( OUT, LOCK_UN );
55 |             }
56 |         )
57 |     );
58 |     
59 |     $proxy->start;
60 |     
61 |     close OUT;
62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide032.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Work in progress (2) 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 32/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Work in progress (2)

30 | 31 |
    32 |
  • 33 |

    The proxy logs the Referer:, URI and status code of the request

    34 |
  • 35 |
  • 36 |

    The log file looks like this:

    37 |
        http://www.google.com/search?q=graphviz&sourceid=firefox&start=0&start=0&ie=utf-8&oe=utf-8 http://www.research.att.com/sw/tools/graphviz/ 200
    38 |     NULL http://www.research.att.com/favicon.ico 200
    39 |     http://www.research.att.com/sw/tools/graphviz/ http://www.research.att.com/sw/tools/graphviz/refs.html 200
    40 |     http://www.research.att.com/sw/tools/graphviz/refs.html http://www.research.att.com/footers/CopyrightNotice.gif 404
    41 |     http://www.research.att.com/sw/tools/graphviz/refs.html http://www.research.att.com/~erg/graphviz/info/output.html 200
    42 |     http://search.cpan.org/ http://search.cpan.org/search?query=graphviz&mode=all 200
    43 |     http://search.cpan.org/search?query=graphviz&mode=all http://search.cpan.org/author/LBROCARD/GraphViz-2.00/lib/GraphViz.pm 301
    44 |     http://search.cpan.org/search?query=graphviz&mode=all http://search.cpan.org/~lbrocard/GraphViz-2.00/lib/GraphViz.pm 200
    45 |     http://search.cpan.org/~lbrocard/GraphViz-2.00/lib/GraphViz.pm http://www.research.att.com/sw/tools/graphviz/ 200
    46 | 47 |
  • 48 |
49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide033.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Work in progress (3) 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 33/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Work in progress (3)

30 | 31 |

With a little help from the GraphViz module, I can show you this:

32 | 33 | 34 |

3D would be nice, though...

35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide034.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Security considerations 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 34/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Security considerations

30 | 31 |
    32 |
  • 33 |

    By default, the proxy listens on 127.0.0.1

    34 |
  • 35 |
  • 36 |

    If you make it listen on any other interface, you should (OK, must) 37 | add an authentication filter, so as not to become an open proxy

    38 |
  • 39 |
40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide035.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Limitations 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 35/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Limitations

30 | 31 |
    32 |
  • 33 |

    There is no store and forward mechanism

    34 |
  • 35 |
  • 36 |

    The proxy does not detect some lost connections, and the child process 37 | is blocked until LWP::UserAgent times out

    38 |
  • 39 |
  • 40 |

    The CONNECT method is supported (giving access to those https URL), 41 | but the proxy cannot use filters on those connections (no man-in-the-middle 42 | yet)

    43 |
  • 44 |
  • 45 |

    The match criteria are positive ("apply this filter to example.com") 46 | and HTTP::Proxy does not support negative criteria (like "apply this 47 | filter to everything but example.com")

    48 |
  • 49 |
  • 50 |

    It still does not do what I wanted when I started to work on this project...

    51 |

    ... luckily, other people worked on that

    52 |
  • 53 |
54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide036.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Tools using HTTP::Proxy 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 36/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Tools using HTTP::Proxy

30 | 31 |
    32 |
  • 33 |

    HTTP::Recorder, by Linda Julien

    34 |

    The first version of HTTP::Recorder was using filters.

    35 |

    The current implementation is actually a subclass of LWP::UserAgent 36 | that records what the proxy asks for and outputs Perl code that 37 | runs a WWW::Mechanize robot.

    38 |
  • 39 |
  • 40 |

    HTTP::Proxy::BodyFilter::Adnix, by Cosimo Streppone

    41 |

    This is a simple ad-blocker. When receiving an image (image/*) 42 | from a known ad-sending site, it replaces it by another image.

    43 |
  • 44 |
45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide037.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Questions 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 37/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Questions

30 | 31 |

Well, if there's some time left. ;-)

32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /html/talks/ye2004/slide038.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Credits 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 19 | 20 | 38/38 21 | 22 | 25 |
26 | 27 |
28 | 29 |

Credits

30 | 31 |
    32 |
  • 33 |

    A small script using Template-Toolkit, Pod::POM and 34 | Pod::POM::View::HTML::Filter turned an ugly pod-like document into pretty HTML 35 | (I guess everybody writes their own slide tool, anyway)

    36 |
  • 37 |
  • 38 |

    Perl::Tidy coloured the Perl code

    39 |
  • 40 |
  • 41 |

    Syntax::Highlight::HTML coloured the HTML code

    42 |
  • 43 |
  • 44 |

    CSS and a tiny bit of JavaScript made all this viewable and usable

    45 |
  • 46 |
  • 47 |

    Nicholas Clark added all the missing "s" I forgot to type :-)

    48 |
  • 49 |
50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /html/talks/ye2004/wip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/book/HTTP-Proxy/7874e910b9d5ffaf55afc106a3e080149879f50b/html/talks/ye2004/wip.png -------------------------------------------------------------------------------- /lib/HTTP/Proxy/BodyFilter/complete.pm: -------------------------------------------------------------------------------- 1 | package HTTP::Proxy::BodyFilter::complete; 2 | 3 | use strict; 4 | use HTTP::Proxy; 5 | use HTTP::Proxy::BodyFilter; 6 | use vars qw( @ISA ); 7 | @ISA = qw( HTTP::Proxy::BodyFilter ); 8 | use Carp; 9 | 10 | sub filter { 11 | my ( $self, $dataref, $message, $protocol, $buffer ) = @_; 12 | return unless defined $buffer; 13 | 14 | $$buffer = $$dataref; 15 | $$dataref = ""; 16 | } 17 | 18 | sub will_modify { 0 } 19 | 20 | 1; 21 | 22 | __END__ 23 | 24 | =head1 NAME 25 | 26 | HTTP::Proxy::BodyFilter::complete - A filter that passes on a complete body or nothing 27 | 28 | =head1 SYNOPSIS 29 | 30 | use HTTP::Proxy; 31 | use HTTP::Proxy::BodyFilter::simple; 32 | use HTTP::Proxy::BodyFilter::complete; 33 | 34 | my $proxy = HTTP::Proxy->new; 35 | 36 | # pass the complete response body to our filter (in one pass) 37 | $proxy->push_filter( 38 | mime => 'text/html', 39 | response => HTTP::Proxy::BodyFilter::complete->new, 40 | response => HTTP::Proxy::BodyFilter::simple->new( 41 | sub { 42 | my ( $self, $dataref, $message, $protocol, $buffer ) = @_; 43 | # some complex processing that needs 44 | # the whole response body 45 | } 46 | ); 47 | ); 48 | 49 | $proxy->start; 50 | 51 | =head1 DESCRIPTION 52 | 53 | The L filter will ensure that the next 54 | filter in the filter chain will only receive complete message bodies 55 | (either request or response). 56 | 57 | It will store the chunks of data as they arrive, only to pass the B 58 | message body after the whole message has been received by the proxy. 59 | 60 | Subsequent filters is the chain will receive the whole body as a big 61 | piece of data. 62 | 63 | =head1 CAVEAT EMPTOR 64 | 65 | This consumes memory and time. 66 | 67 | Use with caution, otherwise your client will timeout, or your proxy will 68 | run out of memory. 69 | 70 | Also note that all filters after C are still called when the 71 | proxy receives data: they just receive empty data. They will receive 72 | the complete data when the filter chain is called for the very last time 73 | (the C<$buffer> parameter is C). (See the documentation of 74 | L for details about the C<$buffer> parameter.) 75 | 76 | =head1 METHOD 77 | 78 | This filter defines two methods, called automatically: 79 | 80 | =over 4 81 | 82 | =item filter() 83 | 84 | Stores the incoming data in memory until the last moment and passes 85 | empty data to the subsequent filters in the chain. They will receive 86 | the full body during the last round of filter calls. 87 | 88 | =item will_modify() 89 | 90 | This method returns a I value, thus indicating to the system 91 | that it will not modify data passing through. 92 | 93 | =back 94 | 95 | =head1 AUTHOR 96 | 97 | Philippe "BooK" Bruhat, Ebook@cpan.orgE. 98 | 99 | =head1 THANKS 100 | 101 | Thanks to Simon Cozens and Merijn H. Brandt, who needed this almost at 102 | the same time. C<;-)> 103 | 104 | =head1 COPYRIGHT 105 | 106 | Copyright 2004-2015, Philippe Bruhat. 107 | 108 | =head1 LICENSE 109 | 110 | This module is free software; you can redistribute it or modify it under 111 | the same terms as Perl itself. 112 | 113 | =cut 114 | 115 | -------------------------------------------------------------------------------- /lib/HTTP/Proxy/BodyFilter/htmltext.pm: -------------------------------------------------------------------------------- 1 | package HTTP::Proxy::BodyFilter::htmltext; 2 | 3 | use strict; 4 | use Carp; 5 | use HTTP::Proxy::BodyFilter; 6 | use vars qw( @ISA ); 7 | @ISA = qw( HTTP::Proxy::BodyFilter ); 8 | 9 | sub init { 10 | croak "Parameter must be a CODE reference" unless ref $_[1] eq 'CODE'; 11 | $_[0]->{_filter} = $_[1]; 12 | } 13 | 14 | sub begin { $_[0]->{js} = 0; } # per message initialisation 15 | 16 | sub filter { 17 | my ( $self, $dataref, $message, $protocol, $buffer ) = @_; 18 | 19 | my $pos = pos($$dataref) = 0; 20 | SCAN: 21 | { 22 | $pos = pos($$dataref); 23 | $$dataref =~ /\G<\s*(?:script|style)[^>]*>/cgi # protect 24 | && do { $self->{js} = 1; redo SCAN; }; 25 | $$dataref =~ /\G<\s*\/\s*(?:script|style)[^>]*>/cgi # unprotect 26 | && do { $self->{js} = 0; redo SCAN; }; 27 | # comments are considered as text 28 | # if you want comments as comments, 29 | # use HTTP::Proxy::BodyFilter::htmlparser 30 | $$dataref =~ /\G > <>><', '', ' > <>>', '<'], 16 | # the following fails because of the implementation of the tags.pm 17 | # a stronger implementation requires parsing 18 | # [ 'xfilter( \$data, undef, undef, 23 | ( defined $buffer ? \$buffer : undef ) ); 24 | is( $data, $_->[2], "Correct data" ); 25 | is( $buffer, $_->[3], "Correct buffer" ); 26 | } 27 | 28 | -------------------------------------------------------------------------------- /t/66htmlparser.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use Test::More; 3 | 4 | BEGIN { 5 | if ( eval "use HTML::Parser; 1;" ) { 6 | plan tests => 5; 7 | } 8 | else { 9 | plan skip_all => 'HTML::Parser not installed'; 10 | } 11 | } 12 | 13 | use HTTP::Proxy; 14 | use HTTP::Proxy::BodyFilter::htmlparser; 15 | 16 | my @results = ( 17 | [ 18 | '

Test

\n

foo
bar

', 19 | '

Test

\n

foo
bar

', 20 | { start => 4, end => 3 } 21 | ], 22 | [ 23 | '

Test

\n

foo
bar

', 24 | '


', 25 | { start => 4, end => 3 } 26 | ], 27 | ); 28 | 29 | my $filter; 30 | my $count; 31 | 32 | # bad initialisation 33 | eval { $filter = HTTP::Proxy::BodyFilter::htmlparser->new("foo"); }; 34 | like( $@, qr/^First parameter must be a HTML::Parser/, "Test constructor" ); 35 | 36 | my $p = HTML::Parser->new; 37 | $p->handler( start => \&start, "self,text" ); 38 | $p->handler( end => \&end, "self,text" ); 39 | $p->handler( start_document => \&start_document, "" ); 40 | 41 | # the handlers 42 | sub start_document { $count = {} } 43 | sub start { $count->{start}++; $_[0]->{output} .= $_[1] } 44 | sub end { $count->{end}++; $_[0]->{output} .= $_[1] } 45 | 46 | # read-only filter 47 | my $data = shift @results; 48 | $filter = HTTP::Proxy::BodyFilter::htmlparser->new($p); 49 | $filter->filter( \$data->[0], undef, undef, undef ); 50 | is_deeply( $data->[0], $data->[1], "Data not modified" ); 51 | is_deeply( $data->[2], $count, "Correct number of start and end events" ); 52 | 53 | # read-write filter (yeah, it's the same) 54 | $data = shift @results; 55 | $filter = HTTP::Proxy::BodyFilter::htmlparser->new( $p, rw => 1 ); 56 | $filter->filter( \$data->[0], undef, undef, undef ); 57 | is_deeply( $data->[0], $data->[1], "Data modified" ); 58 | is_deeply( $data->[2], $count, "Correct number of start and end events" ); 59 | 60 | -------------------------------------------------------------------------------- /t/67complete.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | use Test::More; 4 | use HTTP::Proxy; 5 | use HTTP::Proxy::BodyFilter::complete; 6 | use HTTP::Proxy::BodyFilter::simple; 7 | 8 | my @data = ( 9 | 'miny hollers the let tiger catch meeny a he him', 10 | 'joy beamish flame gyre o blade came callay jaws vorpal', 11 | 'xvi vigor nvi Bvi trived Elvis levee viper e3 PVIC', 12 | 'Wizzle Hunny_Bee Alexander_Beetle Owl Woozle Eeyore Backson', 13 | 'necessitatibus lorem aperiam facere consequuntur incididunt similique' 14 | ); 15 | my $full = join '', @data; 16 | 17 | plan tests => 1 + @data; 18 | 19 | # some variables 20 | my $proxy = HTTP::Proxy->new( port => 0 ); 21 | $proxy->push_filter( 22 | response => HTTP::Proxy::BodyFilter::complete->new(), 23 | response => HTTP::Proxy::BodyFilter::simple->new( 24 | sub { 25 | my ( $self, $dataref, $message, $protocol, $buffer ) = @_; 26 | if ( defined $buffer ) { 27 | is( $$dataref, '', 'Empty chunk of data' ); 28 | } 29 | else { 30 | is( $$dataref, $full, 'Full data in one big chunk' ); 31 | } 32 | } 33 | ), 34 | ); 35 | 36 | # set up a fake request/response set 37 | my $res = 38 | HTTP::Response->new( 200, 'OK', 39 | HTTP::Headers->new( 'Content-Type' => 'text/html' ), 'dummy' ); 40 | $res->request( HTTP::Request->new( GET => 'http://www.example.com/' ) ); 41 | $proxy->request( $res->request ); 42 | $proxy->response($res); 43 | 44 | # run the data through the filters 45 | $proxy->{body}{response}->select_filters($res); 46 | 47 | for my $data (@data) { 48 | $proxy->{body}{response}->filter( \$data, $res, '' ); 49 | } 50 | 51 | # finalize 52 | my $data = ''; 53 | $proxy->{body}{response}->filter_last( \$data, $res, '' ); 54 | 55 | -------------------------------------------------------------------------------- /t/71rot13.t: -------------------------------------------------------------------------------- 1 | use Test::More tests => 4; 2 | use HTTP::Proxy; 3 | use HTTP::Proxy::BodyFilter::tags; 4 | use HTTP::Proxy::BodyFilter::htmltext; 5 | use lib 't/lib'; 6 | use ProxyUtils; 7 | use strict; 8 | 9 | # a very simple proxy 10 | my $proxy = HTTP::Proxy->new( port => 0 ); 11 | 12 | $proxy->push_filter( 13 | mime => 'text/html', 14 | response => HTTP::Proxy::BodyFilter::tags->new, 15 | response => HTTP::Proxy::BodyFilter::htmltext->new( 16 | sub { tr/a-zA-z/n-za-mN-ZA-M/ } 17 | ) 18 | ); 19 | 20 | # get and test the filter stack 21 | my $stack = $proxy->_filter_stack( 22 | body => 'response', 23 | HTTP::Request->new( GET => 'http://foo.com/bar.html' ), 24 | HTTP::Response->new( 25 | 200, "OK", HTTP::Headers->new( 'Content-Type' => 'text/html' ) 26 | ) 27 | ); 28 | 29 | for ( 30 | [ "abc", "nop" ], 31 | [ "100 € is expensive", "100 € vf rkcrafvir" ], 32 | [ " <-- here ", " <-- urer " ], 33 | [ 34 | qq'\n foo', 35 | qq'\n sbb', 36 | ], 37 | ) 38 | { 39 | my $data = "$_->[0]"; 40 | $stack->select_filters( $proxy->{response} ); 41 | $stack->filter( \$data, $proxy->{response}, undef ); 42 | is( $data, $_->[1], "Correct data transformation" ); 43 | } 44 | 45 | -------------------------------------------------------------------------------- /t/90httpstatus.t: -------------------------------------------------------------------------------- 1 | # good place for web client tests: 2 | # http://diveintomark.org/tests/client/http/ 3 | 4 | use strict; 5 | my @url; 6 | my $tests; 7 | 8 | BEGIN { 9 | @url = ( 10 | map ( [ "$_" => 0 + $_ ], 200 .. 206, 300, 304, 306 ), 11 | map ( [ "$_" => 0 + $_, 200 ], 301 .. 303, 305, 307 ), 12 | map ( [ "$_" => 0 + $_ ], 400 .. 418, 500 .. 505 ), 13 | ); 14 | $tests += @$_ - 1 for @url; 15 | } 16 | 17 | use Test::More; 18 | use HTTP::Proxy; 19 | use HTTP::Request::Common; 20 | use lib 't/lib'; 21 | use ProxyUtils; 22 | 23 | my $base = 'http://httpstat.us'; 24 | 25 | plan tests => $tests; 26 | 27 | SKIP: 28 | { 29 | skip "$base is not available", $tests unless web_ok($base); 30 | 31 | # $tests + 2, because of the duplicate 401 32 | my $proxy = HTTP::Proxy->new( 33 | port => 0, 34 | max_keep_alive_requests => $tests, 35 | max_connections => 1, 36 | ); 37 | $proxy->init; 38 | 39 | my $ua = LWP::UserAgent->new( keep_alive => 1 ); 40 | $ua->proxy( http => $proxy->url ); 41 | 42 | # fork the proxy 43 | my $pid = fork_proxy($proxy); 44 | 45 | # check all those pages 46 | for (@url) { 47 | my ( $doc, $status, $status2 ) = @$_; 48 | my $res = $ua->simple_request( GET "$base/$doc" ); 49 | is( $res->code, $status, "$doc => $status " . $res->message ); 50 | 51 | # redirection 52 | if ( $res->is_redirect && $status2 ) { 53 | $res = $ua->simple_request( GET $res->header('Location') ); 54 | is( $res->code, $status2, "$doc => $status2 (redirect)" ); 55 | } 56 | } 57 | 58 | # wait for the proxy 59 | wait; 60 | } 61 | -------------------------------------------------------------------------------- /t/README: -------------------------------------------------------------------------------- 1 | README file for the HTTP::Proxy tests 2 | 3 | * Helper modules 4 | 5 | HTTP::Proxy can test itself without using the network, 6 | thanks to the HTTP::Daemon module. 7 | 8 | But since I want to test the proxy against "real" servers, 9 | I also need to test it with an internet connection. 10 | 11 | localhost tests work as follows: 12 | - a HTTP::Daemon is created and forked, that will serve 13 | a certain number of simple requests 14 | - a HTTP::Proxy is created and forked 15 | - a LWP::UserAgent is created and connects to the proxy 16 | - each of those process can run its own tests independantly, 17 | thanks to Test::More 18 | 19 | The t/lib/ProxyUtils.pm file (`use lib 't/lib'; use ProxyUtils;` in some test files) 20 | exports several functions: 21 | - server_start() 22 | starts a new HTTP::Daemon 23 | - server_next( [ \&answer ] ) 24 | returns the next response from the server (accepts a coderef) 25 | - fork_proxy( $proxy, [ \&end ] ) 26 | fork a proxy server passed as an argument, with an optionnel 27 | subroutine to run at the end 28 | - web_ok() 29 | test if the actual WWW is available for testing 30 | - bare_request( $url, $headers, $proxy ) 31 | send a simple request through the proxy without LWP::UA 32 | return a string containing the full response 33 | 34 | * Test categories 35 | 36 | The tests are prefixed with a number, which indicates several categories: 37 | 38 | 0x - Basic tests 39 | t/00basic.t - use HTTP::Proxy works 40 | t/01pod.t - the POD is correct 41 | t/05new.t - the HTTP::Proxy constructor 42 | 43 | 1x - Minimal functionnality tests 44 | t/10init.t - the proxy initialisation 45 | t/11log.t - the log() and logmask() methods 46 | t/15accessors.t - the proxy accessors 47 | t/17fstack.t - the internal HTTP::Proxy::FilterStack object 48 | 49 | 2x - Network protocols test 50 | t/20dummy.t - tests against a dummy web server 51 | t/20keepalive.t - test the keep-alive connections 52 | t/22http.t - test actual HTTP servers 53 | t/22transparent.t - test transparent proxying 54 | t/23connect.t - test CONNECT to a ssh server 55 | t/23https.t - test CONNECT for SSL 56 | 57 | 3x - (Reserved for future use) 58 | 59 | 4x - Filter-related functions 60 | t/40push_filters.t - the push_filter method 61 | 62 | 5x - Internal header filters 63 | t/50hopbyhop.t - check hop-by-hop headers removal 64 | t/50standard.t - check other headers removal 65 | t/50via.t - check the Via: headers 66 | t/51simple.t - HTTP::Proxy::HeaderFilter::simple 67 | t/51simple2.t - HTTP::Proxy::HeaderFilter::simple with a real proxy 68 | 69 | 6x - Internal body filters 70 | t/61simple.t - HTTP::Proxy::BodyFilter::simple 71 | t/61simple2.t - HTTP::Proxy::BodyFilter::simple with a real proxy 72 | t/64htmltext.t - HTTP::Proxy::BodyFilter::htmltext 73 | t/64lines.t - HTTP::Proxy::BodyFilter::lines 74 | t/64tags.t - HTTP::Proxy::BodyFilter::tags 75 | t/66htmlparser.t - HTTP::Proxy::BodyFilter::htmlparser 76 | 77 | 7x - Complex filter chains 78 | t/71rot13.t - a simple ROT13 filter set 79 | 80 | 8x - (Reserved for future use) 81 | 82 | 9x - miscellaneous tests 83 | t/90diveintomark.t - test the proxy against a lot of status codes 84 | 85 | -------------------------------------------------------------------------------- /t/lib/ProxyUtils.pm: -------------------------------------------------------------------------------- 1 | package ProxyUtils; 2 | 3 | use strict; 4 | use Exporter (); 5 | use IO::Socket::INET; 6 | use vars qw( @ISA @EXPORT @EXPORT_OK ); 7 | 8 | @ISA = qw( Exporter ); 9 | @EXPORT = qw( &server_start &server_next &fork_proxy &web_ok &bare_request ); 10 | @EXPORT_OK = @EXPORT; 11 | 12 | use HTTP::Daemon; 13 | use LWP::UserAgent; 14 | 15 | # start a simple server 16 | sub server_start { 17 | 18 | # create a HTTP::Daemon (on an available port) 19 | my $daemon = HTTP::Daemon->new( 20 | LocalHost => 'localhost', 21 | ReuseAddr => 1, 22 | ) 23 | or die "Unable to start web server"; 24 | return $daemon; 25 | } 26 | 27 | # This must NOT be called in an OO fashion but this way: 28 | # server_next( $server, $coderef, ... ); 29 | # 30 | # The optional coderef takes a HTTP::Request as its first argument 31 | # and returns a HTTP::Response. The rest of server_next() arguments 32 | # are passed to &$anwser; 33 | 34 | sub server_next { 35 | my $daemon = shift; 36 | my $answer = shift; 37 | 38 | # get connection data 39 | my $conn = $daemon->accept; 40 | my $req = $conn->get_request; 41 | 42 | # compute some answer 43 | my $rep; 44 | if ( ref $answer eq 'CODE' ) { 45 | $rep = $answer->( $req, @_ ); 46 | } 47 | else { 48 | $rep = HTTP::Response->new( 49 | 200, 'OK', 50 | HTTP::Headers->new( 'Content-Type' => 'text/plain' ), 51 | sprintf( "You asked for
%s", ( $req->uri ) x 2 ) 52 | ); 53 | } 54 | 55 | $conn->send_response($rep); 56 | $conn->close; 57 | } 58 | 59 | # run a stand-alone proxy 60 | # the proxy accepts an optional coderef to run after serving all requests 61 | sub fork_proxy { 62 | my $proxy = shift; 63 | my $sub = shift; 64 | 65 | my $pid = fork; 66 | die "Unable to fork proxy" if not defined $pid; 67 | 68 | if ( $pid == 0 ) { 69 | $0 .= " (proxy)"; 70 | 71 | # this is the http proxy 72 | $proxy->start; 73 | $sub->() if ( defined $sub and ref $sub eq 'CODE' ); 74 | exit 0; 75 | } 76 | 77 | # back to the parent 78 | return $pid; 79 | } 80 | 81 | # check that the web connection is working 82 | sub web_ok { 83 | my $ua = LWP::UserAgent->new( env_proxy => 1, timeout => 30 ); 84 | my $res = 85 | $ua->request( 86 | HTTP::Request->new( GET => shift||'http://www.google.com/intl/en/' ) ); 87 | return $res->is_success; 88 | } 89 | 90 | # send a simple request without LWP::UA 91 | # bare_request($url, $headers, $proxy) 92 | sub bare_request { 93 | my ($url, $headers, $proxy) = @_; 94 | 95 | # connect directly to the proxy 96 | $proxy->url() =~ /:(\d+)/; 97 | my $sock = IO::Socket::INET->new( 98 | PeerAddr => 'localhost', 99 | PeerPort => $1, 100 | Proto => 'tcp' 101 | ) or do { warn "Can't connect to the proxy"; return ""; }; 102 | 103 | # send the request 104 | print $sock "GET $url HTTP/1.0\015\012", 105 | $headers->as_string( "\015\012" ), "\015\012"; 106 | my $content = join "", <$sock>; 107 | 108 | # close the connection to the proxy 109 | close $sock or warn "close: $!"; 110 | return $content; 111 | } 112 | 113 | package HTTP::Proxy; 114 | 115 | # return the requested internal filter stack 116 | # _filter_stack( body|header, request|response, HTTP::Message ) 117 | sub _filter_stack { 118 | my ( $self, $part, $mesg ) = splice( @_, 0, 3 ); 119 | die "No <$part><$mesg> filter stack" 120 | unless $part =~ /^(?:header|body)$/ 121 | and $mesg =~ /^(?:request|response)$/; 122 | 123 | for (@_) { 124 | die "$_ is not a HTTP::Request or HTTP::Response" 125 | unless ( ref $_ ) =~ /^HTTP::(Request|Response)$/; 126 | $self->{ lc $1 } = $_; 127 | } 128 | $self->{response}->request( $self->{request} ); 129 | return $self->{$part}{$mesg}; 130 | } 131 | 132 | -------------------------------------------------------------------------------- /t/test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | This is an HTML test file 4 | 5 | 6 |

Test title

7 |

Test sub-title

8 |

9 | This is a test paragraph, 10 | with bold and emphasised 11 | text. 12 |

13 |
14 |

Another test

15 |
16 |    # some source code
17 |    $a++
18 |   
19 |
20 | 21 | --------------------------------------------------------------------------------