# NOPOD NOTICE: the documentation and whitespace have been stripped # from this file in order to reduce filesize. # #!/usr/bin/perl # LW2 version 2.4 # # LW2 copyright 2000-2006 by rain forest puppy, rfp.labs # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # package LW2; $LW2::VERSION="2.4"; $PACKAGE='LW2'; BEGIN { package LW2; $PACKAGE='LW2'; $LW_SSL_LIB = 0; $LW_SSL_KEEPALIVE = 0; $LW_NONBLOCK_CONNECT = 1; $_SSL_LIBRARY = undef; eval "use Socket"; if ( !$@ ) { eval "use Net::SSLeay"; # do we have SSL support? if ( !$@ ) { $LW_SSL_LIB = 1; $_SSL_LIBRARY = 'Net::SSLeay'; Net::SSLeay::load_error_strings(); Net::SSLeay::SSLeay_add_ssl_algorithms(); Net::SSLeay::randomize(); } else { eval "use Net::SSL"; if ( !$@ ) { $LW_SSL_LIB = 2; $_SSL_LIBRARY = 'Net::SSL'; } } if ( $^O !~ /Win32/ ) { eval "use POSIX qw(:errno_h :fcntl_h)"; if ($@) { $LW_NONBLOCK_CONNECT = 0; } } else { *EINPROGRESS = sub { 10036 }; *EWOULDBLOCK = sub { 10035 }; } } } # BEGIN sub auth_brute_force { my ( $auth_method, $hrin, $user, $pwordref, $dom, $fail_code ) = @_; my ( $P, %hout ); $fail_code ||= 401; return undef if ( !defined $auth_method || length($auth_method) == 0 ); return undef if ( !defined $user || length($user) == 0 ); return undef if ( !( defined $hrin && ref($hrin) ) ); return undef if ( !( defined $pwordref && ref($pwordref) ) ); map { ( $P = $_ ) =~ tr/\r\n//d; auth_set( $auth_method, $hrin, $user, $P, $dom ); return undef if ( http_do_request( $hrin, \%hout ) ); return $P if ( $hout{whisker}->{code} != $fail_code ); } @$pwordref; return undef; } sub auth_unset { my $href = shift; return if ( !defined $href || !ref($href) ); delete $$href{Authorization}; delete $$href{'Proxy-Authorization'}; delete $$href{whisker}->{auth_callback}; delete $$href{whisker}->{auth_proxy_callback}; delete $$href{whisker}->{auth_data}; delete $$href{whisker}->{auth_proxy_data}; } sub auth_set { my ( $method, $href, $user, $pass, $domain ) = ( lc(shift), @_ ); return if ( !( defined $href && ref($href) ) ); return if ( !defined $user || !defined $pass ); if ( $method eq 'basic' ) { $$href{'Authorization'} = 'Basic ' . encode_base64( $user . ':' . $pass, '' ); } if ( $method eq 'proxy-basic' ) { $$href{'Proxy-Authorization'} = 'Basic ' . encode_base64( $user . ':' . $pass, '' ); } if ( $method eq 'ntlm' ) { http_close($href); $$href{whisker}->{auth_data} = ntlm_new( $user, $pass, $domain ); $$href{whisker}->{auth_callback} = \&_ntlm_auth_callback; } if ( $method eq 'proxy-ntlm' ) { utils_croak('',"auth_set: proxy-ntlm auth w/ SSL not currently supported") if ( $href->{whisker}->{ssl} > 0 ); http_close($href); $$href{whisker}->{auth_proxy_data} = ntlm_new( $user, $pass, $domain ); $$href{whisker}->{auth_proxy_callback} = \&_ntlm_auth_proxy_callback; } } sub cookie_new_jar { return {}; } sub cookie_read { my ( $count, $jarref, $hrs, $hrq, $rej ) = ( 0, @_ ); return 0 if ( !( defined $jarref && ref($jarref) ) ); return 0 if ( !( defined $hrs && ref($hrs) ) ); return 0 if ( !( defined $$hrs{whisker}->{cookies} && ref( $$hrs{whisker}->{cookies} ) ) ); my @opt; if(defined $hrq && ref($hrq)){ push @opt, $hrq->{whisker}->{host}; my $u = $hrq->{whisker}->{uri}; $u=~s#/.*?$##; $u='/' if($u eq ''); push @opt, $u, $rej; } foreach ( @{ $hrs->{whisker}->{cookies} } ) { cookie_parse( $jarref, $_ , @opt); $count++; } return $count; } sub cookie_parse { my ( $jarref, $header ) = (shift, shift); my ( $Dd, $Dp, $R ) = (shift, shift, shift||0); return if ( !( defined $jarref && ref($jarref) ) ); return if ( !( defined $header && length($header) > 0 ) ); my @C = ( undef, undef, undef, undef, 0 ); $header =~ tr/\r\n//d; my ($f,%seen,$n,$t) = (1); while( length($header) ){ $header =~ s/^[ \t]+//; last if(!($header =~ s/^([^ \t=;]+)//)); my $an = lc($1); my $av = undef; $header =~ s/^[ \t]+//; if(substr($header,0,1) eq '='){ $header=~s/^=[ \t]*//; if(substr($header,0,1) eq '"'){ my $p = index($header,'"',1); last if($p == -1); $av = substr($header,1,$p-1); substr($header,0,$p+1)=''; } else { $av = $1 if($header =~ s/^([^ \t;,]*)//); } } else { my $p = index($header,';'); substr($header,0,$p)=''; } $header =~ s/^.*?;//; if($f){ return if(!defined $av); ($f,$n,$C[0])=(0,$an,$av); } else { $seen{$an}=$av if(!exists $seen{$an}); } } return if(!defined $n || $n eq ''); my $del = 0; $del++ if($C[0] eq ''); $del++ if(defined $seen{'max-age'} && $seen{'max-age'} eq '0'); if($del){ delete $$jarref{$n} if exists $$jarref{$n}; return; } if(defined $seen{domain} && $seen{domain} ne ''){ $t = $seen{domain}; $t='.'.$t if(substr($t,0,1) ne '.' && !_is_ip_address($t)); } else { $t=$Dd; } $t=~s/\.+$// if(defined $t); $C[1]=$t; if(defined $seen{path}){ $t = $seen{path}; } else { $t=$Dp || '/'; } $t=~s#/+$##; $t='/' if(substr($t,0,1) ne '/'); $C[2]=$t; $C[4]=1 if(exists $seen{secure}); return if($R && !_is_valid_cookie_match($C[1], $C[2], $Dd, $Dp)); $$jarref{$n} = \@C; } sub _is_ip_address { my $n = shift; return 1 if($n=~/^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$/); return 0; } sub _is_valid_cookie_match { my ($cd, $cp, $td, $tp) = @_; return 0 if(index($tp,$cp)!=0); if(substr($cd,0,1) eq '.'){ if( $td =~ /(.+)$cd$/ ){ return 1 if(index($1,'.') == -1); } return 0; } else { return 0 if($cd ne $td); } return 1; } sub cookie_write { my ( $jarref, $hin, $override ) = @_; my ( $name, $out ) = ( '', '' ); return if ( !( defined $jarref && ref($jarref) ) ); return if ( !( defined $hin && ref($hin) ) ); $override = $override || 0; $$hin{'whisker'}->{'ssl'} = $$hin{'whisker'}->{'ssl'} || 0; foreach $name ( keys %$jarref ) { next if ( $name eq '' ); if($override){ $out .= "$name=$$jarref{$name}->[0];"; next; } next if ( $$hin{'whisker'}->{'ssl'} == 0 && $$jarref{$name}->[4] > 0 ); if ( $$hin{'whisker'}->{'host'} =~ /$$jarref{$name}->[1]$/i && $$hin{'whisker'}->{'uri'} =~ /^$$jarref{$name}->[2])/ ) { $out .= "$name=$$jarref{$name}->[0];"; } } if ( $out ne '' ) { $$hin{'Cookie'} = $out; } } sub cookie_get { my ( $jarref, $name ) = @_; return undef if ( !( defined $jarref && ref($jarref) ) ); if ( defined $$jarref{$name} ) { return @{ $$jarref{$name} }; } return undef; } sub cookie_get_names { my ( $jarref, $name ) = @_; return undef if ( !( defined $jarref && ref($jarref) ) ); return keys %$jarref; } sub cookie_get_valid_names { my ( $jarref, $domain, $url, $ssl ) = @_; return () if ( !( defined $jarref && ref($jarref) ) ); return () if ( !defined $domain || $domain eq '' ); return () if ( !defined $url || $url eq '' ); $ssl ||= 0; my (@r, $name); foreach $name ( keys %$jarref ) { next if ( $name eq '' ); next if ( $$jarref{$name}->[4] > 0 && $ssl == 0 ); if ( $domain =~ /$$jarref{$name}->[1]$/i && $url =~ /^$$jarref{$name}->[2])/i ) { push @r, $name; } } return @r; } sub cookie_set { my ( $jarref, $name, $value, $domain, $path, $expire, $secure ) = @_; my @construct; return if ( !( defined $jarref && ref($jarref) ) ); return if ( $name eq '' ); if ( !defined $value || $value eq '' ) { delete $$jarref{$name}; return; } $path = $path || '/'; $secure = $secure || 0; @construct = ( $value, $domain, $path, undef, $secure ); $$jarref{$name} = \@construct; } %_crawl_config = ( 'save_cookies' => 0, 'reuse_cookies' => 1, 'save_offsites' => 0, 'save_non_http' => 0, 'follow_moves' => 1, 'url_limit' => 1000, 'use_params' => 0, 'params_double_record' => 0, 'skip_ext' => { gif => 1, jpg => 1, png => 1, gz => 1, swf => 1, pdf => 1, zip => 1, wav => 1, mp3 => 1, asf => 1, tgz => 1 }, 'save_skipped' => 0, 'save_referrers' => 0, 'use_referrers' => 1, 'do_head' => 0, 'callback' => 0, 'netloc_bug' => 1, 'normalize_uri' => 1, 'source_callback' => 0 ); %_crawl_linktags = ( 'a' => 'href', 'applet' => [qw(codebase archive code)], 'area' => 'href', 'base' => 'href', 'bgsound' => 'src', 'blockquote' => 'cite', 'body' => 'background', 'del' => 'cite', 'embed' => [qw(src pluginspage)], 'form' => 'action', 'frame' => [qw(src longdesc)], 'iframe' => [qw(src longdesc)], 'ilayer' => 'background', 'img' => [qw(src lowsrc longdesc usemap)], 'input' => [qw(src usemap)], 'ins' => 'cite', 'isindex' => 'action', 'head' => 'profile', 'layer' => [qw(background src)], 'link' => 'href', 'object' => [qw(codebase data archive usemap)], 'q' => 'cite', 'script' => 'src', 'table' => 'background', 'td' => 'background', 'th' => 'background', 'xmp' => 'href', ); sub crawl_new { my ( $start, $depth, $reqref, $trackref ) = @_; my %X; return undef if ( !defined $start || !defined $depth ); return undef if ( !defined $reqref || !ref($reqref) ); $trackref = {} if ( !defined $trackref || !ref($trackref) ); $X{track} = $trackref; $X{request} = $reqref; $X{depth} = $depth || 2; $X{start} = $start; $X{magic} = 7340; $X{reset} = sub { $X{errors} = []; # all errors encountered $X{urls} = []; # temp; used to hold all URLs on page $X{server_tags} = {}; # all server tags found $X{referrers} = {}; # who refers to what URLs $X{offsites} = {}; # all URLs that point offsite $X{response} = {}; # temp; the response hash $X{non_http} = {}; # all non_http URLs found $X{cookies} = {}; # all cookies found $X{forms} = {}; # all forms found $X{jar} = {}; # temp; cookie jar $X{url_queue} = []; # temp; URLs to still fetch $X{config} = {}; %{ $X{config} } = %_crawl_config; %{ $X{track} } = (); $X{parsed_page_count} = 0; }; $X{crawl} = sub { crawl( \%X, @_ ) }; $X{reset}->(); return \%X; } { # START OF CRAWL CONTAINER sub crawl { my ( $C, $START, $MAX_DEPTH ) = @_; return undef if ( !defined $C || !ref($C) || $C->{magic} != 7340 ); my $CONFIG = $C->{config}; my $TRACK = $C->{track}; my $URLS = $C->{urls}; my $RESP = $C->{response}; my $REQ = $C->{request}; my $Q = $C->{url_queue}; $START ||= $C->{start}; $C->{depth} = $MAX_DEPTH || $C->{depth}; my ( $COUNT, $T, @ST ) = ( 0, '' ); my @v = uri_split($START); my $error = undef; $error = 'Start protocol not http or https' if ( $v[1] ne 'http' && $v[1] ne 'https' ); $error = 'Bad start host' if ( !defined $v[2] || $v[2] eq '' ); push( @{ $C->{errors} }, $error ) && return undef if ( defined $error ); @ST = ( $v[2], $v[3], $v[0], 1, '', '' ); $REQ->{whisker}->{ssl} = 1 if ( $v[1] eq 'https' ); $REQ->{whisker}->{host} = $ST[0]; $REQ->{whisker}->{port} = $ST[1]; $REQ->{whisker}->{lowercase_incoming_headers} = 1; $REQ->{whisker}->{ignore_duplicate_headers} = 0; delete $REQ->{whisker}->{parameters}; http_fixup_request($REQ); push @$Q, \@ST; while (@$Q) { @ST = @{ shift @$Q }; next if ( defined $TRACK->{ $ST[2] } && $TRACK->{ $ST[2] } ne '?' ); if ( $ST[3] > $C->{depth} ) { $TRACK->{ $ST[2] } = '?' if ( $CONFIG->{save_skipped} > 0 ); next; } $ST[4] = uri_get_dir( $ST[2] ); $REQ->{whisker}->{uri} = $ST[2]; if ( $ST[5] ne '' && $CONFIG->{use_referrers} > 0 ) { $REQ->{Referrer} = $ST[5]; } my $result = _crawl_do_request( $REQ, $RESP, $C ); if ( $result == 1 || $result == 2 ) { push @{ $C->{errors} }, "$ST[2]: $RESP->{whisker}->{error}"; next; } $COUNT++; $TRACK->{ $ST[2] } = $RESP->{whisker}->{code} if ( $result == 0 || $result == 4 ); $TRACK->{ $ST[2] } = '?' if ( ( $result == 3 || $result == 5 ) && $CONFIG->{save_skipped} > 0 ); if ( defined $RESP->{server} && !ref( $RESP->{server} ) ) { $C->{server_tags}->{ $RESP->{server} }++; } if ( defined $RESP->{'set-cookie'} ) { if ( $CONFIG->{save_cookies} > 0 ) { if ( ref( $RESP->{'set-cookie'} ) ) { $C->{cookies}->{$_}++ foreach ( @{ $RESP->{'set-cookie'} } ); } else { $C->{cookies}->{ $RESP->{'set-cookie'} }++; } } cookie_read( $C->{jar}, $RESP ) if ( $CONFIG->{reuse_cookies} > 0 ); } next if ( $result == 4 || $result == 5 ); next if ( scalar @$Q > $CONFIG->{url_limit} ); if ( $result == 0 ) { # page should be parsed if ( $CONFIG->{source_callback} != 0 && ref( $CONFIG->{source_callback} ) eq 'CODE' ) { &{ $CONFIG->{source_callback} }($C); } html_find_tags( \$RESP->{whisker}->{data}, \&_crawl_extract_links_test, 0, $C, \%_crawl_linktags ); $C->{parsed_page_count}++; } push @$URLS, $RESP->{location} if ( $result == 3 ); foreach $T (@$URLS) { $T =~ tr/\0\r\n//d; next if ( length($T) == 0 ); next if ( $T =~ /^#/i ); # fragment push @{ $C->{referrers}->{$T} }, $ST[2] if ( $CONFIG->{save_referrers} > 0 ); if ( $T =~ /^([a-zA-Z0-9]*):/ && lc($1) ne 'http' && lc($1) ne 'https' ) { push @{ $C->{non_http}->{$T} }, $ST[2] if ( $CONFIG->{save_non_http} > 0 ); next; } if ( substr( $T, 0, 2 ) eq '//' && $CONFIG->{netloc_bug} > 0 ) { if ( $REQ->{whisker}->{ssl} > 0 ) { $T = 'https:' . $T; } else { $T = 'http:' . $T; } } if ( $CONFIG->{callback} != 0 ) { next if &{ $CONFIG->{callback} }( $T, $C ); } $T = uri_absolute( $T, $ST[4], $CONFIG->{normalize_uri} ); @v = uri_split($T); if ( ( defined $v[2] && $v[2] ne $ST[0] ) || ( $v[3] > 0 && $v[3] != $ST[1] ) ) { $C->{offsites}->{ uri_join(@v) }++ if ( $CONFIG->{save_offsites} > 0 ); next; } if ( $v[0] =~ /\.([a-z0-9]+)$/i ) { if ( defined $CONFIG->{skip_ext}->{ lc($1) } ) { $TRACK->{ $v[0] } = '?' if ( $CONFIG->{save_skipped} > 0 ); next; } } if ( defined $v[4] && $CONFIG->{use_params} > 0 ) { $TRACK->{ $v[0] } = '?' if ( $CONFIG->{params_double_record} > 0 && !defined $TRACK->{ $v[0] } ); $v[0] = $v[0] . '?' . $v[4]; } next if ( defined $TRACK->{ $v[0] } ) ; # we've processed this already push @$Q, [ $ST[0], $ST[1], $v[0], $ST[3] + 1, '', $ST[2] ]; } # foreach @$URLS = (); # reset for next round } # while return $COUNT; } # end sub crawl sub _crawl_extract_links_test { my ( $TAG, $hr, $dr, $start, $len, $OBJ ) = ( lc(shift), @_ ); return undef if ( !scalar %$hr ); # fastpath quickie my $t = $_crawl_linktags{$TAG}; while ( my ( $key, $val ) = each %$hr ) { # normalize element values $$hr{ lc($key) } = $val; } if ( $TAG eq 'meta' && defined $$hr{'http-equiv'} && $$hr{'http-equiv'} eq 'refresh' && defined $$hr{'content'} && $$hr{'content'} =~ m/url=(.+)/i ) { push( @{ $OBJ->{urls} }, $1 ); } elsif ( ref($t) ) { foreach (@$t) { push( @{ $OBJ->{urls} }, $$hr{$_} ) if ( defined $$hr{$_} ); } } else { push( @{ $OBJ->{urls} }, $$hr{$t} ) if ( defined $$hr{$t} ); } if ( $TAG eq 'form' && defined $$hr{action} ) { my $u = $OBJ->{response}->{whisker}->{uri}; $OBJ->{forms}->{ uri_absolute( $$hr{action}, $u, 1 ) }++; } return undef; } sub _crawl_do_request_ex { my ( $hrin, $hrout, $OBJ ) = @_; my $ret; $ret = http_do_request( $hrin, $hrout ); return ( 2, $ret ) if ( $ret == 2 ); # if there was connection error, do not continue if ( $ret == 0 ) { # successful request if ( $$hrout{whisker}->{code} < 308 && $$hrout{whisker}->{code} > 300 ) { if ( $OBJ->{config}->{follow_moves} > 0 ) { return ( 3, $ret ) if ( defined $$hrout{location} && !ref( $$hrout{location} ) ); } return ( 5, $ret ); # not avail } if ( $$hrout{whisker}->{code} == 200 ) { if ( defined $$hrout{'content-type'} && $$hrout{'content-type'} !~ /^text\/htm/i ) { return ( 4, $ret ); } } } return ( -1, $ret ); # fallthrough } sub _crawl_do_request { my ( $hrin, $hrout, $OBJ ) = @_; my ( $cret, $lwret ); if ( $OBJ->{config}->{do_head} && $$hrin{whisker}->{method} ne 'HEAD' ) { my $save = $$hrin{whisker}->{method}; $$hrin{whisker}->{method} = 'HEAD'; ( $cret, $lwret ) = _crawl_do_request_ex( $hrin, $hrout, $OBJ ); $$hrin{whisker}->{method} = $save; return $cret if ( $cret > 0 ); if ( $lwret == 0 ) { # successful request if ( $$hrout{whisker}->{code} == 501 ) { # HEAD not allowed $OBJ->{config}->{do_head} = 0; # no more HEAD requests } } } ( $cret, $lwret ) = _crawl_do_request_ex( $hrin, $hrout, $OBJ ); return $lwret if ( $cret < 0 ); return $cret; } } # CRAWL_CONTAINER sub dump { my %what = @_; my ( $final, $k, $v ) = (''); while ( ( $k, $v ) = each %what ) { return undef if ( ref($k) || !ref($v) ); $final .= "\$$k = " . _dump( 1, $v, 1 ); $final =~ s#,\n$##; $final .= ";\n"; } return $final; } sub dump_writefile { my $file = shift; my $output = &dump(@_); return 1 if ( !open( OUT, ">$file" ) || !defined $output ); binmode(OUT); print OUT $output; close(OUT); } sub _dump { # dereference and dump an element my ( $t, $ref, $depth ) = @_; my ( $out, $k, $v ) = (''); $depth ||= 1; return 'undef' if ( $depth > 128 ); if ( !defined $ref ) { return 'undef'; } elsif ( ref($ref) eq 'HASH' ) { $out .= "{\n"; while ( ( $k, $v ) = each %$ref ) { $out .= "\t" x $t; $out .= _dumpd($k) . ' => '; if ( ref($v) ) { $out .= _dump( $t + 1, $v, $depth + 1 ); } else { $out .= _dumpd($v); } $out .= ",\n" unless ( substr( $out, -2, 2 ) eq ",\n" ); } $out =~ s#,\n$#\n#; $out .= "\t" x ( $t - 1 ); $out .= "},\n"; } elsif ( ref($ref) eq 'ARRAY' ) { $out .= "["; if ( ~~@$ref ) { $out .= "\n"; foreach $v (@$ref) { $out .= "\t" x $t; if ( ref($v) ) { $out .= _dump( $t + 1, $v, $depth + 1 ); } else { $out .= _dumpd($v); } $out .= ",\n" unless ( substr( $out, -2, 2 ) eq ",\n" ); } $out =~ s#,\n$#\n#; $out .= "\t" x ( $t - 1 ); } $out .= "],\n"; } elsif ( ref($ref) eq 'SCALAR' ) { $out .= _dumpd($$ref); } elsif ( ref($ref) eq 'REF' ) { $out .= _dump( $t, $$ref, $depth + 1 ); } elsif ( ref($ref) ) { # unknown/unsupported ref $out .= "undef"; } else { # normal scalar $out .= _dumpd($ref); } return $out; } sub _dumpd { # escape a scalar string my $v = shift; return 'undef' if ( !defined $v ); return "''" if ( $v eq '' ); return "$v" if ( $v eq '0' || $v !~ tr/0-9//c && $v !~ m#^0+# ); if ( $v !~ tr/ !-~//c ) { $v =~ s/(['\\])/\\$1/g; return "'$v'"; } $v =~ s#\\#\\\\#g; $v =~ s#"#\\"#g; $v =~ s#\r#\\r#g; $v =~ s#\n#\\n#g; $v =~ s#\t#\\t#g; $v =~ s#\$#\\\$#g; $v =~ s#([^!-~ ])#sprintf('\\x%02x',ord($1))#eg; return "\"$v\""; } { # package variables my $MIMEBASE64_TRYLOADING = 1; sub encode_base64 { if ($MIMEBASE64_TRYLOADING) { eval "require MIME::Base64"; $MIMEBASE64_TRYLOADING = 0; } goto &MIME::Base64::encode_base64 if ($MIME::Base64::VERSION); my $res = ""; my $eol = $_[1]; $eol = "\n" unless defined $eol; pos( $_[0] ) = 0; while ( $_[0] =~ /(.{1,45})/gs ) { $res .= substr( pack( 'u', $1 ), 1 ); chop($res); } $res =~ tr|` -_|AA-Za-z0-9+/|; my $padding = ( 3 - length( $_[0] ) % 3 ) % 3; $res =~ s/.{$padding}$/'=' x $padding/e if $padding; if ( length $eol ) { $res =~ s/(.{1,76})/$1$eol/g; } $res; } sub decode_base64 { if ($MIMEBASE64_TRYLOADING) { eval "require MIME::Base64"; $MIMEBASE64_TRYLOADING = 0; } goto &MIME::Base64::decode_base64 if ($MIME::Base64::VERSION); my $str = shift; my $res = ""; $str =~ tr|A-Za-z0-9+=/||cd; $str =~ s/=+$//; # remove padding $str =~ tr|A-Za-z0-9+/| -_|; # convert to uuencoded format while ( $str =~ /(.{1,60})/gs ) { my $len = chr( 32 + length($1) * 3 / 4 ); # compute length byte $res .= unpack( "u", $len . $1 ); # uudecode } $res; } } # end package variables sub encode_uri_hex { # normal hex encoding my $str = shift; $str =~ s/([^\/])/sprintf("%%%02x",ord($1))/ge; return $str; } sub encode_uri_randomhex { # random normal hex encoding my @T = split( //, shift ); my $s; foreach (@T) { if (m#[;=:&@\?]#) { $s .= $_; next; } if ( ( rand() * 2 ) % 2 == 1 ) { $s .= sprintf( "%%%02x", ord($_) ); } else { $s .= $_; } } return $s; } sub encode_uri_randomcase { my ( $x, $uri ) = ( '', shift ); return $uri if ( $uri !~ tr/a-zA-Z// ); # fast-path my @T = split( //, $uri ); for ( $x = 0 ; $x < ( scalar @T ) ; $x++ ) { if ( ( rand() * 2 ) % 2 == 1 ) { $T[$x] =~ tr/A-Za-z/a-zA-Z/; } } return join( '', @T ); } sub encode_unicode { my ( $c, $r ) = ( '', '' ); foreach $c ( split( //, shift ) ) { $r .= pack( "v", ord($c) ); } return $r; } sub decode_unicode { my $str = $_[0]; return $str if ( $str !~ tr/!-~//c ); # fastpath my ( $lead, $count, $idx ); my $out = ''; my $len = length($str); my ( $ptr, $no, $nu ) = ( 0, 0, 0 ); while ( $ptr < $len ) { my $c = substr( $str, $ptr, 1 ); if ( ord($c) >= 0xc0 && ord($c) <= 0xfd ) { $count = 0; $c = ord($c) << 1; while ( ( $c & 0x80 ) == 0x80 ) { $c <<= 1; last if ( $count++ == 4 ); } $c = ( $c & 0xff ); for ( $idx = 1 ; $idx < $count ; $idx++ ) { my $o = ord( substr( $str, $ptr + $idx, 1 ) ); $no = 1 if ( $o != 0x80 ); $nu = 1 if ( $o < 0x80 || $o > 0xbf ); } my $o = ord( substr( $str, $ptr + $idx, 1 ) ); $nu = 1 if ( $o < 0x80 || $o > 0xbf ); if ($nu) { $out .= substr( $str, $ptr++, 1 ); } else { if ($no) { $out .= "\xff"; # generic replacement char } else { my $prior = ord( substr( $str, $ptr + $count - 1, 1 ) ) << 6; $out .= pack( "C", (( ord( substr( $str, $ptr + $count, 1 ) ) & 0x7f ) + $prior ) & 255 ); } $ptr += $count + 1; } $no = $nu = 0; } else { $out .= $c; $ptr++; } } return $out; } sub encode_anti_ids { my ( $rhin, $modes ) = ( shift, shift ); my ( @T, $x, $c, $s, $y ); my $ENCODED = 0; my $W = $$rhin{'whisker'}; return if ( !( defined $rhin && ref($rhin) ) ); $$rhin{'whisker'}->{'uri_orig'} = $$rhin{'whisker'}->{'uri'}; if ( $modes =~ /4/ ) { $s = ''; if ( $$W{'uri'} =~ m#^/# ) { $y = &utils_randstr; $s .= $y while ( length($s) < 512 ); $$W{'uri'} = "/$s/.." . $$W{'uri'}; } } if ( $modes =~ /7/ ) { $$W{'uri'} = encode_uri_randomcase( $$W{'uri'} ); } if ( $modes =~ /2/ ) { $$W{'uri'} =~ s#/#/./#g; } if ( $modes =~ /8/ ) { $$W{'uri'} =~ s#/#\\#g; $$W{'uri'} =~ s#^\\#/#; $$W{'uri'} =~ s#^([a-zA-Z0-9_]+):\\#$1://#; $$W{'uri'} =~ s#\\$#/#; } if ( $modes =~ /1/ ) { if ( $ENCODED == 0 ) { $$W{'uri'} = encode_uri_randomhex( $$W{'uri'} ); $ENCODED = 1; } } if ( $modes =~ /5/ ) { ( $s, $y ) = ( &utils_randstr, &utils_randstr ); $$W{'uri'} = "/$s.html%3F$y=/../$$W{'uri'}"; } if ( $modes =~ /3/ ) { $s = &utils_randstr; $$W{'uri'} = "/%20HTTP/1.1%0d%0aAccept%3a%20$s/../..$$W{'uri'}"; } if ( $modes =~ /6/ ) { $$W{'http_space1'} = "\t"; } } %_forms_ELEMENTS = ( 'form' => 1, 'input' => 1, 'textarea' => 1, 'button' => 1, 'select' => 1, 'option' => 1, '/select' => 1 ); sub forms_read { my $dr = shift; return undef if ( !ref($dr) || length($$dr) == 0 ); my $A = [ {}, [] ]; html_find_tags( $dr, \&_forms_parse_callback, 0, $A, \%_forms_ELEMENTS ); if ( scalar %{ $A->[0] } ) { push( @{ $A->[1] }, $A->[0] ); } return $A->[1]; } sub forms_write { my $hr = shift; return undef if ( !ref($hr) || !( scalar %$hr ) ); return undef if ( !defined $$hr{"\0"} ); my $t = '
[0] . '" method="'; $t .= $$hr{"\0"}->[1] . '" action="' . $$hr{"\0"}->[2] . '"'; if ( defined $$hr{"\0"}->[3] ) { $t .= ' ' . join( ' ', @{ $$hr{"\0"}->[3] } ); } $t .= ">\n"; my ( $name, $ar ); while ( ( $name, $ar ) = each(%$hr) ) { next if ( $name eq "\0" ); next if ( $name eq '' && $ar->[0]->[0] eq '' ); foreach $a (@$ar) { my $P = ''; $P = ' ' . join( ' ', @{ $$a[2] } ) if ( defined $$a[2] ); $t .= "\t"; if ( $$a[0] eq 'textarea' ) { $t .= "\n"; } elsif ( $$a[0] =~ m/^input-(.+)$/ ) { $t .= "\n"; } elsif ( $$a[0] eq 'option' ) { $t .= "\t