From: b Date: Tue, 24 Nov 2015 21:50:06 +0000 (+0000) Subject: use strict X-Git-Url: http://bicyclesonthemoon.info/git-projects/?a=commitdiff_plain;h=b21c444f8f0f5fa06c93ce84a50bff9c855ee564;p=yplom%2Fproxy use strict Some functions moved to a module (library). git-svn-id: svn://botcastle1b/yplom/proxy@11 05ce6ebb-7522-4a6e-a768-0026ae12be9f --- diff --git a/access.1.pl b/access.1.pl index 4cd3e2f..77cc729 100644 --- a/access.1.pl +++ b/access.1.pl @@ -25,82 +25,24 @@ # http://wiki.squid-cache.org/Features/AddonHelpers#Access_Control_.28ACL.29 # http://perldoc.perl.org/perlsec.html#Taint-mode -###ACCESS_PATH; -###TIMEOUT_UNLOCK; -###TIMEOUT_INACT; +use strict; +###LIB; +use proxy_lib qw(access); $|=1; -$timeout_unlock = TIMEOUT_UNLOCK*60; -$timeout_inact = TIMEOUT_INACT*60; while () { - $line=$_; + my $line=$_; if ($line =~ s/^([0-9]+ )//) { print $1; } - $accesstime = time(); + my $accesstime = time(); # The input line is expected to have the IP. if ($line =~ /^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)( .*)?$/) { - $IP = $1; - - # If access file doesn't exist the proxy is not unlocked for this IP. - $accesspath=ACCESS_PATH.$IP; - if (! (-e $accesspath)) { - print "ERR\n"; - } - elsif (open ($accessfile,"+<",$accesspath)) { - # The access file has to be locked otherwise one process may read it while - # another one updates it. - unless (flock ($accessfile, 2)) { - close ($accessfile); - print "ERR\n"; - next; - } - $unlocktime=<$accessfile>; - $lasttime=<$accessfile>; - - # First line is the timestamp of proxy unlocking. Second line is the - # timestamp of last access. - $unlocktime =~ s/[\r\n]//g; - $lasttime =~ s/[\r\n]//g; - - # The timestamps must be numbers. - if ($unlocktime =~ /^([0-9]+)$/) { - $unlocktime=int($1); - } - else { - close ($accessfile); - print "ERR\n"; - next; - } - - if ($lasttime =~ /^([0-9]+)$/) { - $lasttime=int($1); - } - else { - close ($accessfile); - print "ERR\n"; - next; - } - - if ((abs($accesstime-$unlocktime)>$timeout_unlock) or (abs($accesstime-$lasttime)>$timeout_inact)){ - # If too much time passed proxy is not unlocked any more. Also the - # access file is now useless and will be removed. - close ($accessfile); - unlink $accesspath; - print "ERR\n"; - } - else { - # the proxy is unlocked. The access file will now be updated. - if (seek($accessfile, 0, 0)) { - print $accessfile "$unlocktime\n$accesstime\n"; - truncate ($accessfile , tell($accessfile)); - } - close ($accessfile); - print "OK\n"; - } + if (access($accesstime, $1, '')) { + print "OK\n"; } else { print "ERR\n"; diff --git a/configure.pl b/configure.pl index a0900bc..0b08d37 100755 --- a/configure.pl +++ b/configure.pl @@ -55,6 +55,8 @@ $def{'LOG_SIZE_LIMIT'} = "use constant LOG_SIZE_LIMIT => ".$set{'log_size $def{'LOGS_UNCOMPRESSED'} = "use constant LOGS_UNCOMPRESSED => ".$set{'logs_uncompressed'}.";"; $def{'LOGS_TOTAL'} = "use constant LOGS_TOTAL => ".$set{'logs_total'}.";"; $def{'REWRITE_URL'} = "use constant REWRITE_URL => '".$set{'https_proxy_domain'}.":".$set{'https_proxy_port'}."';"; +$def{'LIB'} = "use lib '".$set{'lib_path'}."';"; + $def{'PATH'} = "\$ENV{'PATH'} = '".$set{'path'}."';"; @@ -98,6 +100,7 @@ $def{'MV'} = 'MV='.$set{'mv'}; $def{'CP'} = 'CM='.$set{'cp'}; $def{'RM'} = 'RM='.$set{'rm'}; $def{'OD'} = 'OD='.$set{'bin_path'}; +$def{'LD'} = 'LD='.$set{'lib_path'}; $def{'CM'} = 'CM='.$set{'chmod'}; diff --git a/makefile.1.mak b/makefile.1.mak index fe4e8cc..ca6fe27 100644 --- a/makefile.1.mak +++ b/makefile.1.mak @@ -8,8 +8,9 @@ ###RM; ###CM; ###OD; +###LD; -all: moveout copyout remove config.txt +all: moveout copyout moveoutlib remove config.txt moveout: proxy proxy.pl cleararch oldlogs rewrite access access.pl setuid exec @@ -18,6 +19,9 @@ moveout: proxy proxy.pl cleararch oldlogs rewrite access access.pl setuid exec copyout: setuid exec # $(CP) access.pl $(OD) +moveoutlib: proxy_lib.pm setuid exec + $(MV) proxy_lib.pm $(LD) + setuid: proxy access $(CM) u+s proxy access @@ -32,6 +36,9 @@ remove: proxy proxy.c access access.c copyout moveout setuid exec proxy.pl: proxy.1.pl configure.pl settings $(PL) configure.pl settings proxy.pl +proxy_lib.pm: proxy_lib.1.pm configure.pl settings + $(PL) configure.pl settings proxy_lib.pm + proxy.c: proxy.1.c configure.pl settings $(PL) configure.pl settings proxy.c diff --git a/proxy.1.pl b/proxy.1.pl index 114c55b..0de727f 100755 --- a/proxy.1.pl +++ b/proxy.1.pl @@ -32,7 +32,11 @@ # https://tools.ietf.org/html/rfc3875 # https://tools.ietf.org/html/rfc2396 +use strict; +#use warnings; +###LIB; +use proxy_lib qw(urldecode divideurl urldiv2path getcgi formatheader access); use POSIX qw(strftime); ###UNLOCK_LOG; @@ -53,9 +57,16 @@ use POSIX qw(strftime); use constant REQUEST_HEADER_BLOCK => qr/^(accept-encoding|connection|te|transfer-encoding|via)$/; use constant RESPONSE_HEADER_BLOCK => qr/^(connection|content-encoding|content-length|trailer|transfer-encoding)$/; -$accesstime = time(); -$timeout_unlock = TIMEOUT_UNLOCK*60; -$timeout_inact = TIMEOUT_INACT*60; + +my $accesstime = time(); + +my $URL=''; +my $IP; +my $prot=''; +my $host=''; +my $port=''; +my $path=''; +my $query=''; delete @ENV{qw(IFS CDPATH ENV BASH_ENV)}; ###PATH; @@ -66,7 +77,7 @@ if (($ENV{'HTTP_HOST'} =~ UNLOCK_PROXY_HOST) and (urldecode($ENV{'PATH_INFO'}) = } else { # proxy will work only if unlocked - if(access()) { + if(access($accesstime,$ENV{'REMOTE_ADDR'},$ENV{'HTTP_HOST'})) { proxy(); } else { @@ -80,7 +91,49 @@ sub proxy { # The HTTPS variable is not defined in rfc3875 (might be a portability # problem?) - $archive = 0; # if this http request should be archived + my $archive=0; # if this http request should be archived + my $postdata=0; # if there is data to pe posted + my $archpath=0; + + my $respheadpath=''; + my $respcontpath=''; + my $postheadpath=''; + my $postcontpath=''; + + my $respheadfile; + my $respcontfile; + my $postheadfile; + my $postcontfile; + + my $postcontopen=0; + my $respcontopen=0; + my $respheadopen=0; + + my $safelength=1; # if Content-length is safe to be forwarded + my $definedlength=0; # if Content-length is defined + my $safeerror=1; # if it's safe to send an error message. + my $lastskipped=1; # if the previous header field was not forwarded + my $forceexit=0; # if the proxy should exit with error + my $errormsg; + + my $contentlength; + + my @curl_arg=(CURL_PATH,'-i','-#'); # the command line arguments for curl + my @reqhead; # the HTTP request header + my @resphead; + + #pipes + my $curloutrd; + my $curloutwr; + my $curlinrd; + my $curlinwr; + my $curlerrrd; + my $curlerrwr; + + my $curlpid; + + my $buffer; + my $buffer2; # The proxy will not support request methods other than GET, POST, HEAD. # There is no need for it. @@ -101,7 +154,7 @@ sub proxy { # The URL will now be validated. # Additionally it will be divided in parts which will be stored in variables: # $prot (protocol), $host, $port, $path, $query. - if (validateurl($URL)) { + if (($prot, $host, $port, $path, $query)=divideurl($URL)) { $URL =~ /^(.*)$/; $URL = $1; } @@ -143,11 +196,14 @@ sub proxy { return fail("Status: 403 Forbidden\n","403 Forbidden","The proxy does not accept port number $port because of infinite loop prevention."); } - @curl_arg=(CURL_PATH,'-i','-#'); # the command line arguments for curl - @reqhead; # the HTTP request header - foreach $envk (keys %ENV) { + + foreach my $envk (keys %ENV) { # The relevant http variables either start with HTTP_ or CONTENT_. + my $headarg=''; + my $headval=''; + my $headname=''; + if ($envk =~ /^(HTTP_[A-Z0-9_]+)$/) { $headname=formatheader($1); } @@ -202,7 +258,7 @@ sub proxy { # The path for archiving the HTTP request is made, the required directories # are created. # If this fails the request will not be archived but will still be proxied. - $archpath = makepath($prot, $host, $port, $path, $query); + $archpath = urldiv2path($prot, $host, $port, $path, $query, 1); if ($archpath) { $archive = 1; if ($postdata) { @@ -222,10 +278,10 @@ sub proxy { # The request header is written. if (open ($postheadfile, ">", $postheadpath)) { - foreach $line (@reqhead) { + foreach my $line (@reqhead) { print $postheadfile "$line\n"; } - print $postheadfile "$line\n"; + print $postheadfile "\n"; close ($postheadfile); } @@ -291,8 +347,6 @@ sub proxy { binmode($curlinwr) or die "Failed to switch stdin pipe to binary mode.\n"; binmode(STDIN) or die "Failed to switch STDIN to binary mode.\n"; - $buffer2; - $postcontopen = open($postcontfile, ">", $postcontpath); if ($postcontopen){ unless (binmode($postcontfile)) { @@ -320,17 +374,11 @@ sub proxy { # close ($curlinrd); } - $safelength=1; # if Content-length is safe to be forwarded - $definedlength=0; # if Content-length is defined - $safeerror=1; # if it's safe to send an error message. - $lastskipped=1; # if the previous header field was not forwarded - $forceexit=0; # if the proxy should exit with error - - if ($line = <$curloutrd>) { + if (defined (my $line = <$curloutrd>)) { $line =~ s/\r?\n$//; # The first line is the status. if ($line =~ /^HTTP\/[0-9]+\.[0-9]+ ([0-9]+ .*)$/) { - $status =$1; + my $status =$1; @resphead=('Status: '.$status); # If the status is not 2XX and this URL was already archived before it @@ -344,8 +392,10 @@ sub proxy { } # now read the header lines - while ($line = <$curloutrd>) { + while (defined($line = <$curloutrd>)) { $line =~ s/[\r\n]$//g; + my $headname=''; + my $headval=''; # empty line (double \n) meane the end of the header. if($line eq ''){ @@ -368,7 +418,7 @@ sub proxy { if ($line =~ /^([^:]*):(.*)$/) { $headname=lc($1); - $headvalue=$2; + $headval=$2; } else { $lastskipped=1; @@ -382,15 +432,15 @@ sub proxy { $safelength = 0; } if($headname eq 'content-encoding') { - if($headvalue !~ /^[\r\n \t]*identity[\r\n \t]*$/) { + if($headval !~ /^[\r\n \t]*identity[\r\n \t]*$/) { $safelength = 0; } } if($headname eq 'content-length') { $definedlength=1; - $headvalue =~ s/^[\r\n \t]*//; - $headvalue =~ s/[\r\n \t]*$//; - $contentlength = int($headvalue); + $headval =~ s/^[\r\n \t]*//; + $headval =~ s/[\r\n \t]*$//; + $contentlength = int($headval); } # Some header fields should not be forwarded. @@ -398,13 +448,12 @@ sub proxy { $lastskipped =1; next; } - $line; $lastskipped=0; push @resphead, $line; } # Only add the Content-length header if it's safe to do so. - if ($lengthdefined and ($safelength or ($contentlength==0))) { + if ($definedlength and ($safelength or ($contentlength==0))) { push @resphead, 'Content-length: '.$contentlength; } @@ -438,7 +487,7 @@ sub proxy { } else { # Create the file only if there was content received. No 0-byte files. - $firstline=1; + my $firstline=1; while (read ($curloutrd,$buffer,1024)) { unless (print (STDOUT $buffer)) { $forceexit=1; @@ -476,7 +525,7 @@ sub proxy { # Generate error message if possible. if ($safeerror) { $errormsg=''; - while ($line = <$curlerrrd>) { + while (defined(my $line = <$curlerrrd>)) { $line =~ s/\r?\n/
/g; $errormsg=$errormsg.$line; } @@ -505,88 +554,16 @@ sub proxy { } -# This function checks if the user has unlocked the proxy. Nonzero means yes. -sub access { - if($ENV{'HTTP_HOST'} =~ UNLOCK_PROXY_HOST){ - return 1; - } - - # Check if IP - if ($ENV{'REMOTE_ADDR'} =~ /^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)$/) { - $IP = $1; - - # 127.0.0.1 is always allowed. All https requests are tunnelled through - # 127.0.0.1. - if ($IP eq '127.0.0.1') { - return 1; - } - - # If access file doesn't exist the proxy is not unlocked for this IP. - $accesspath=ACCESS_PATH.$IP; - if (! (-e $accesspath)) { - return 0; - } - - elsif (open ($accessfile,"+<",$accesspath)) { - # The access file has to be locked otherwise one process may read it while - # another one updates it. - unless (flock ($accessfile, 2)) { - close ($accessfile); - return 0; - } - # First line is the timestamp of proxy unlocking. Second line is the - # timestamp of last access. - $unlocktime=<$accessfile>; - $lasttime=<$accessfile>; - - $unlocktime =~ s/[\r\n]//g; - $lasttime =~ s/[\r\n]//g; - - # The timestamps must be numbers. - if ($unlocktime =~ /^([0-9]+)$/) { - $unlocktime=int($1); - } - else { - close ($accessfile); - return 0; - } - - if ($lasttime =~ /^([0-9]+)$/) { - $lasttime=int($1); - } - else { - close ($accessfile); - return 0; - } - - if ((abs($accesstime-$unlocktime)>$timeout_unlock) or (abs($accesstime-$lasttime)>$timeout_inact)){ - # If too much time passed proxy is not unlocked any more. Also the - # access file is now useless and will be removed. - close ($accessfile); - unlink $accesspath; - return 0; - } - else { - # the proxy is unlocked. The access file will now be updated. - if (seek($accessfile, 0, 0)) { - print $accessfile "$unlocktime\n$accesstime\n"; - truncate ($accessfile , tell($accessfile)); - } - close ($accessfile); - return 1; - } - } - else { - return 0; - } - } - else { - return 0; - } -} - # Function to unlock the proxy. sub unlock { + my %CGI; + my $passpath; + my $passfile; + my $logfile; + my $pass; + my $accessfile; + my $accesspath; + if ($ENV{'REMOTE_ADDR'} =~ /^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)$/) { $IP=$1; } @@ -661,44 +638,6 @@ sub unlock { } -# The function to get CGI parameters from string. -# Format is: name=url_encoded_value&name=url_encoded_value& ... &name=url_encoded_value -sub getcgi { - my $arg; - my $varl; - my %cgi; - my $i = $_[0]; - $i =~ s/[\r\n]//g; - my @s = split('&',$i); - foreach my $l ( @s) { - ($arg,$val)=split('=',$l); - $cgi{$arg}=urldecode($val); - } - return %cgi; -} - -# Function for decoding URL-encoded text -sub urldecode { - my $t = $_[0]; - $t =~ s/\+/ /g; - $t =~ s/%([a-fA-F0-9]{2})/chr(hex($1))/eg; - return $t; -} - -# Function for formatting header field names. -# The Environment variable name is like this: HTTP_HEADER_NAME or HEADER_NAME. -# But should be Header-Name. -# Actually the names are case-insensitive but in practise they often look like -# this. (Otherwise suspicious?) -sub formatheader { - my $t = $_[0]; - $t =~ s/^HTTP_//; - $t = lc($t); - $t =~ s/^([a-z])/uc($1)/e; - $t =~ s/_([a-z])/'-'.uc($1)/eg; - return $t; -} - # Function for showing the proxy unlock form page. # arguments: 1 - additional message (optional), 2 - additional header fields # (optional) @@ -790,282 +729,3 @@ sub noaccess { print ''.UNLOCK_PROXY_URL.''; print "\n"; } - -# Function to show debug information. Not used any more. -sub debag { - print "Content-type: text/plain\n\n"; - foreach $envk (keys %ENV) { - print "$envk = $ENV{$envk}\n"; - } - print "\n"; - - foreach $line ( ) { - print $line; - } - print "\n\n"; - if ($ENV{'REQUEST_URI'} =~ /^[a-z]*(:[0-9]*)?:\/\//){ - #if - $URL=$ENV{'REQUEST_URI'} - } - else{ - $URL=($ENV{'HTTPS'} eq 'on' ? 'https://' : 'http://').$ENV{'HTTP_HOST'}.$ENV{'REQUEST_URI'}; - } - print 'URL: ',$URL,"\n"; -} - -# Function to check URL and divide in parts: protocol, hostname, port number, -# path, query string. -sub validateurl { - my $url = $_[0]; - $prot; - my $hostportpathquery; - my $hostportpath; - $query; - my $hostport; - $path; - my $part; - $host; - $port; - - if ($url =~ /^([A-Za-z]+):\/\/(.*)/) { - $prot = $1; - $hostportpathquery = $2; - } - else { - return 0; - } - - if ($prot !~ /^https?$/) { - return 0; - } - - if ($hostportpathquery eq ''){ - return 0; - } - - if ($hostportpathquery =~ /^([^?]+)\?(.*)$/) { - $hostportpath = $1; - $query = $2; - - if ($query !~ /^((%[0-9A-Fa-f][0-9A-Fa-f])|([A-Za-z0-9;\/\?:@&=\+\$\,\-_\.~\*'\(\)]))*$/) { - return 0; - } - } - else { - $hostportpath = $hostportpathquery; - $query = ''; - } - - if($hostportpath =~ /^([^\/]+)(\/.*)$/){ - $hostport = $1; - $path = $2; - - if ($path !~ /^((%[0-9A-Fa-f][0-9A-Fa-f])|([A-Za-z0-9:;@&=\+\$\,-_\.~\*'\(\)]))*$/) { - return 0; - } - $path=urldecode($path); - } - else { - $hostport = $hostportpath; - $path = ''; - } - - if ($hostport =~ /^(.*):([0-9]+)$/) { - $host = $1; - $port = $2; - } - else { - $host=$hostport; - $port=''; - } - - if ($host !~ /^[A-Za-z0-9\-\.]+$/) { - return 0; - } - - return 1; -} - -# Function to convert URL to archive path. Also creates required directories. -# Returns the path on success and empty string on fail. -# Inspired by the MIRA browser. -sub makepath { - (my $prot, my $host, my $port, my $path, my $query) = @_; - - my $archpath = ARCH_PATH; - - # First subdir: protocol @p port_number - if ($prot =~ /^(https?)$/) { - $archpath .= $1; - } - else { - return ''; - } - - if ($port =~ /^([0-9]+)$/) { - $archpath .= "\@p$1"; - } - - unless (-d $archpath) - { - unless (mkdir $archpath) { - return ''; - } - } - - # Host name: each segment between dots is a subdir, revedrsed order, last part - # ends with @n. - # For example www.example.com becomes - # ... /com/example/www@/ ... - - # When segment is longer than 120 characters it's divided into 64 character - # parts ending with '-' until the last one is not longer than 120. - # For example - # www.aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz.com - # becomes - # ... /com/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggg-/gggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmm-/mmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz/www@n/ ... - - if($host =~ /^([A-Za-z0-9\-\.]+)$/) { - $host = $1; - } - else { - return ''; - } - - while ((my $ind = rindex ($host, '.'))>=0) { - my $part= substr $host, $ind+1; - $host = substr $host, 0, $ind; - - while (length ($part) > 120) { - $archpath .= '/'.substr($part,0,64).'-'; - $part = substr($part,64); - unless (-d $archpath) - { - unless (mkdir $archpath) { - return ''; - } - } - } - $archpath .= '/'.$part; - unless (-d $archpath) - { - unless (mkdir $archpath) { - return ''; - } - } - } - while (length ($host) > 120) { - $archpath .= '/'.substr($host,0,64).'-'; - $host = substr($host,64); - unless (-d $archpath) - { - unless (mkdir $archpath) { - return ''; - } - } - } - $archpath .= '/'.$host.'@n'; - unless (-d $archpath) - { - unless (mkdir $archpath) { - return ''; - } - } - - # Path: each segment between '/' is subdir. The segment after the last '/' is - # not a subdir - it is part of the file name. And it will be appended with - # some characters outside of this function: @g, @h, @u or @v. - # Exception: when there is a query string the last segment is a subdir too and - # ends with q. - # Characters that are not letters or numbers or '_' or '.' are encoded. Like - # in URL-encoding but with '@' instead of '%'. - # When segment is longer than 120 characters it's divided into 64 character - # parts ending with '-' until the last one is not longer than 120. - - # For example: - # /some/path/?a=1 becomes ... /some/path@q/ ... - # /some/other-path becomes ... /some/other@2Dpath - # /path/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz/yyy - # becomes - # ... /path/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggg-/gggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmm-/mmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz/yyy - - $path =~ s/^\///; - - while ((my $ind = index ($path, '/'))>=0) { - my $part = substr $path, 0, $ind; - $path= substr $path, $ind+1; - - $part =~ s/([^A-Za-z0-9_\.])/sprintf ("@%02X",ord($1))/eg; - - while (length ($part) > 120) { - $archpath .= '/'.substr($part,0,64).'-'; - $part = substr($part,64); - unless (-d $archpath) - { - unless (mkdir $archpath) { - return ''; - } - } - } - $archpath .= '/'.$part; - unless (-d $archpath) - { - unless (mkdir $archpath) { - return ''; - } - } - } - $path =~ s/([^A-Za-z0-9_\.])/sprintf ("@%02X",ord($1))/eg; - while (length ($path) > 120) { - $archpath .= '/'.substr($path,0,64).'-'; - $path = substr($path,64); - unless (-d $archpath) - { - unless (mkdir $archpath) { - return ''; - } - } - } - - if ($query) { - # query string: The last part is not a subdir - it is part of the file name. - # And it will be appended with some characters outside of this function: @g, - # @h, @u or @v. - # When Query string is longer than 240 characters it's divided into 128 - # character parts ending with '-' until the last one is not longer than 240. - # Characters that are not letters or numbers or '_' or '.' are encoded. Like - # in URL-encoding but with '@' instead of '%'. - # When segment is longer than 120 characters it's divided into 64 character - # parts ending with '-' until the last one is not longer than 120. - # The '?' at the beginning is not part of the query string. - # For example: - # ?a=1&b=%25%5E%26 becomes ... /a@3D1@26b@3D@2525@255E@2526 - # ?a=aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz&b=aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz - # becomes - # ... /a@3Daaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmm-/mmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz@26@b@3Daaaaaaaaaabbbb-/bbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz - $query =~ s/([^A-Za-z0-9_\.])/sprintf ("@%02X",ord($1))/eg; - - $archpath .= '/'.$path.'@q'; - - $path = $query; - unless (-d $archpath) - { - unless (mkdir $archpath) { - return ''; - } - } - - while (length ($path) > 240) { - $archpath .= '/'.substr($path,0,128).'-'; - $path = substr($path,128); - unless (-d $archpath) - { - unless (mkdir $archpath) { - return ''; - } - } - } - } - $archpath .= '/'.$path; - return $archpath; -} diff --git a/proxy_lib.1.pm b/proxy_lib.1.pm new file mode 100644 index 0000000..f3bf29a --- /dev/null +++ b/proxy_lib.1.pm @@ -0,0 +1,537 @@ +# proxy_lib.pm is generated from proxy_lib.1.pm +# +# Some of the functions would be used by multiple bots for different platforms +# using the proxy archive. + +package proxy_lib; + +use strict; +#use warnings +use Exporter; +use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); + +###ARCH_PATH; +###ACCESS_PATH; +###TIMEOUT_UNLOCK; +###TIMEOUT_INACT; +###UNLOCK_PROXY_HOST; + +$VERSION = 0.000002; +@ISA = qw(Exporter); +@EXPORT = (); +@EXPORT_OK = qw(access divideurl formatheader getcgi joinurl path2url url2path path2urldiv urldecode urldiv2path); +%EXPORT_TAGS = (); + +# This function checks if the user has unlocked the proxy. Nonzero means yes. +sub access { + (my $time, my $ip, my $host) = @_; + my $timeout_unlock = TIMEOUT_UNLOCK*60; + my $timeout_inact = TIMEOUT_INACT*60; + my $accesspath=''; + my $accessfile; + my $lasttime; + my $unlocktime; + + if($host =~ UNLOCK_PROXY_HOST){ + return 1; + } + + # Check if IP + if ($ip =~ /^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)$/) { + $ip = $1; + + # 127.0.0.1 is always allowed. All https requests are tunnelled through + # 127.0.0.1. + if ($ip eq '127.0.0.1') { + return 1; + } + + # If access file doesn't exist the proxy is not unlocked for this IP. + $accesspath=ACCESS_PATH.$ip; + if (! (-e $accesspath)) { + return 0; + } + + elsif (open ($accessfile,"+<",$accesspath)) { + # The access file has to be locked otherwise one process may read it while + # another one updates it. + unless (flock ($accessfile, 2)) { + close ($accessfile); + return 0; + } + # First line is the timestamp of proxy unlocking. Second line is the + # timestamp of last access. + $unlocktime=<$accessfile>; + $lasttime=<$accessfile>; + + $unlocktime =~ s/[\r\n]//g; + $lasttime =~ s/[\r\n]//g; + + # The timestamps must be numbers. + if ($unlocktime =~ /^([0-9]+)$/) { + $unlocktime=int($1); + } + else { + close ($accessfile); + return 0; + } + + if ($lasttime =~ /^([0-9]+)$/) { + $lasttime=int($1); + } + else { + close ($accessfile); + return 0; + } + + if ((abs($time-$unlocktime)>$timeout_unlock) or (abs($time-$lasttime)>$timeout_inact)){ + # If too much time passed proxy is not unlocked any more. Also the + # access file is now useless and will be removed. + close ($accessfile); + unlink $accesspath; + return 0; + } + else { + # the proxy is unlocked. The access file will now be updated. + if (seek($accessfile, 0, 0)) { + print $accessfile "$unlocktime\n$time\n"; + truncate ($accessfile , tell($accessfile)); + } + close ($accessfile); + return 1; + } + } + else { + return 0; + } + } + else { + return 0; + } +} + + +# Function to check URL and divide in parts: protocol, hostname, port number, +# path, query string. +sub divideurl { + my $url = $_[0]; + my $prot; + my $hostportpathquery; + my $hostportpath; + my $query; + my $hostport; + my $path; + my $part; + my $host; + my $port; + + if ($url =~ /^([A-Za-z]+):\/\/(.*)/) { + $prot = $1; + $hostportpathquery = $2; + } + else { + return (); + } + + if ($prot !~ /^https?$/) { + return (); + } + + if ($hostportpathquery eq ''){ + return (); + } + + if ($hostportpathquery =~ /^([^?]+)\?(.*)$/) { + $hostportpath = $1; + $query = $2; + + if ($query !~ /^((%[0-9A-Fa-f][0-9A-Fa-f])|([A-Za-z0-9;\/\?:@&=\+\$\,\-_\.~\*'\(\)]))*$/) { + return (); + } + } + else { + $hostportpath = $hostportpathquery; + $query = ''; + } + + if($hostportpath =~ /^([^\/]+)(\/.*)$/){ + $hostport = $1; + $path = $2; + + if ($path !~ /^((%[0-9A-Fa-f][0-9A-Fa-f])|([A-Za-z0-9:;@&=\+\$\,-_\.~\*'\(\)]))*$/) { + return (); + } + $path=urldecode($path); + } + else { + $hostport = $hostportpath; + $path = ''; + } + + if ($hostport =~ /^(.*):([0-9]+)$/) { + $host = $1; + $port = $2; + } + else { + $host=$hostport; + $port=''; + } + + if ($host !~ /^[A-Za-z0-9\-\.]+$/) { + return (); + } + + return ($prot, $host, $port, $path, $query); +} + +# Function for decoding URL-encoded text +sub urldecode { + my $t = $_[0]; + $t =~ s/\+/ /g; + $t =~ s/%([a-fA-F0-9]{2})/chr(hex($1))/eg; + return $t; +} + +# The function to get CGI parameters from string. +# Format is: name=url_encoded_value&name=url_encoded_value& ... &name=url_encoded_value +sub getcgi { + my $arg; + my $val; + my %cgi; + my $i = $_[0]; + $i =~ s/[\r\n]//g; + my @s = split('&',$i); + foreach my $l ( @s) { + ($arg,$val)=split('=',$l); + $cgi{$arg}=urldecode($val); + } + return %cgi; +} + +# Function for formatting header field names. +# The Environment variable name is like this: HTTP_HEADER_NAME or HEADER_NAME. +# But should be Header-Name. +# Actually the names are case-insensitive but in practise they often look like +# this. (Otherwise suspicious?) +sub formatheader { + my $t = $_[0]; + $t =~ s/^HTTP_//; + $t = lc($t); + $t =~ s/^([a-z])/uc($1)/e; + $t =~ s/_([a-z])/'-'.uc($1)/eg; + return $t; +} + +# Function to convert URL to archive path. Also creates required directories +# if $mkdir true. +# Returns the path on success and undef on fail. +# Inspired by the MIRA browser. +sub urldiv2path { + (my $prot, my $host, my $port, my $path, my $query, my $mkdir) = @_; + + my $archpath = ARCH_PATH; + + # First subdir: protocol @p port_number + if ($prot =~ /^(https?)$/) { + $archpath .= $1; + } + else { + return undef; + } + + unless ($port){ + $port=($prot eq 'https')?'443':'80'; + } + if ($port =~ /^([0-9]+)$/) { + $archpath .= "\@p$1"; + } + else { + return undef; + } + + if ($mkdir and !(-d $archpath)) + { + unless (mkdir $archpath) { + return undef; + } + } + + # Host name: each segment between dots is a subdir, revedrsed order, last part + # ends with @n. + # For example www.example.com becomes + # ... /com/example/www@/ ... + + # When segment is longer than 120 characters it's divided into 64 character + # parts ending with '-' until the last one is not longer than 120. + # For example + # www.aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz.com + # becomes + # ... /com/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggg-/gggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmm-/mmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz/www@n/ ... + + if($host =~ /^([A-Za-z0-9\-\.]+)$/) { + $host = $1; + } + else { + return undef; + } + + while ((my $ind = rindex ($host, '.'))>=0) { + my $part= substr $host, $ind+1; + $host = substr $host, 0, $ind; + + while (length ($part) > 120) { + $archpath .= '/'.substr($part,0,64).'-'; + $part = substr($part,64); + if ($mkdir and !(-d $archpath)) + { + unless (mkdir $archpath) { + return undef; + } + } + } + $archpath .= '/'.$part; + if ($mkdir and !(-d $archpath)) + { + unless (mkdir $archpath) { + return undef; + } + } + } + while (length ($host) > 120) { + $archpath .= '/'.substr($host,0,64).'-'; + $host = substr($host,64); + if ($mkdir and !(-d $archpath)) + { + unless (mkdir $archpath) { + return undef; + } + } + } + $archpath .= '/'.$host.'@n'; + if ($mkdir and !(-d $archpath)) + { + unless (mkdir $archpath) { + return undef; + } + } + + # Path: each segment between '/' is subdir. The segment after the last '/' is + # not a subdir - it is part of the file name. And it will be appended with + # some characters outside of this function: @g, @h, @u or @v. + # Exception: when there is a query string the last segment is a subdir too and + # ends with q. + # Characters that are not letters or numbers or '_' or '.' are encoded. Like + # in URL-encoding but with '@' instead of '%'. + # When segment is longer than 120 characters it's divided into 64 character + # parts ending with '-' until the last one is not longer than 120. + + # For example: + # /some/path/?a=1 becomes ... /some/path@q/ ... + # /some/other-path becomes ... /some/other@2Dpath + # /path/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz/yyy + # becomes + # ... /path/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggg-/gggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmm-/mmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz/yyy + + $path =~ s/^\///; + + while ((my $ind = index ($path, '/'))>=0) { + my $part = substr $path, 0, $ind; + $path= substr $path, $ind+1; + + $part =~ s/([^A-Za-z0-9_\.])/sprintf ("@%02X",ord($1))/eg; + + while (length ($part) > 120) { + $archpath .= '/'.substr($part,0,64).'-'; + $part = substr($part,64); + if ($mkdir and !(-d $archpath)) + { + unless (mkdir $archpath) { + return undef; + } + } + } + $archpath .= '/'.$part; + if ($mkdir and !(-d $archpath)) + { + unless (mkdir $archpath) { + return undef; + } + } + } + $path =~ s/([^A-Za-z0-9_\.])/sprintf ("@%02X",ord($1))/eg; + while (length ($path) > 120) { + $archpath .= '/'.substr($path,0,64).'-'; + $path = substr($path,64); + if ($mkdir and !(-d $archpath)) + { + unless (mkdir $archpath) { + return undef; + } + } + } + + if ($query) { + # query string: The last part is not a subdir - it is part of the file name. + # And it will be appended with some characters outside of this function: @g, + # @h, @u or @v. + # When Query string is longer than 240 characters it's divided into 128 + # character parts ending with '-' until the last one is not longer than 240. + # Characters that are not letters or numbers or '_' or '.' are encoded. Like + # in URL-encoding but with '@' instead of '%'. + # When segment is longer than 120 characters it's divided into 64 character + # parts ending with '-' until the last one is not longer than 120. + # The '?' at the beginning is not part of the query string. + # For example: + # ?a=1&b=%25%5E%26 becomes ... /a@3D1@26b@3D@2525@255E@2526 + # ?a=aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz&b=aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz + # becomes + # ... /a@3Daaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmm-/mmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz@26@b@3Daaaaaaaaaabbbb-/bbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz + $query =~ s/([^A-Za-z0-9_\.])/sprintf ("@%02X",ord($1))/eg; + + $archpath .= '/'.$path.'@q'; + + $path = $query; + if ($mkdir and !(-d $archpath)) + { + unless (mkdir $archpath) { + return undef; + } + } + + while (length ($path) > 240) { + $archpath .= '/'.substr($path,0,128).'-'; + $path = substr($path,128); + if ($mkdir and !(-d $archpath)) + { + unless (mkdir $archpath) { + return undef; + } + } + } + } + $archpath .= '/'.$path; + return $archpath; +} + +sub url2path { + return urldiv2path(divideurl($_[0])); +} + +sub path2urldiv { + my $archpath = $_[0]; + my $prot; + my $host; + my $port; + my $path; + my $query; + + if (index($archpath, ARCH_PATH) != 0) { + return (); + } + $archpath = substr ($archpath, length(ARCH_PATH)); + + $archpath =~ s/-\///g; + + if ($archpath =~/^((https?)\@p([0-9]+)\/)/) { + $archpath = substr($archpath, length($1)); + $prot = $2; + $port = $3; + } + else { + return (); + } + + if ($archpath =~ /^(([A-Za-z0-9\-\/]+)\@n)\//) { + $archpath = substr($archpath, length($1)); + my $host2 = $2; + + while ((my $ind = rindex ($host2, '/'))>=0) { + $host .= (substr $host2, $ind+1).'.'; + $host2 = substr $host2, 0, $ind; + } + $host .= $host2; + } + else { + return (); + } + + $archpath =~ s/@[a-z]$//; + + if ((my $ind = rindex ($archpath, '@q/'))>=0) { + $query = substr ($archpath, $ind+3); + $archpath = substr $archpath, 0, $ind; + if ($query =~ /^(([A-Za-z0-9_\.]|(@[0-9A-F][0-9A-F]))*)$/) { + $query=$1; + } + else { + return (); + } + $query =~ s/@([A-F0-9]{2})/chr(hex($1))/eg; + } + else { + $query = ''; + } + + if ($archpath =~ /^(([A-Za-z0-9_\.\/]|(@[0-9A-F][0-9A-F]))*)$/) { + $path=$1; + } + else { + return (); + } + $path =~ s/@([A-F0-9]{2})/chr(hex($1))/eg; + return ($prot, $host, $port, $path, $query); +} + +sub joinurl { + (my $prot, my $host, my $port, my $path, my $query) = @_; + my $url; + if ($prot =~ /^(https?)$/) { + $url=$1.'://'; + } + else { + return undef; + } + + if($host =~ /^([A-Za-z0-9\-\.]+)$/) { + $url.=$1; + } + else { + return undef; + } + + if ($port =~ /^([0-9]*)$/) { + $port = $1; + } + else { + return undef; + } + if (($port == 80 and $prot eq 'http') or ($port == 443 and $prot eq 'https')) { + $port=''; + } + if ($port) { + $url.=':'.$port; + } + + if ($path =~ /^(\/((%[0-9A-Fa-f][0-9A-Fa-f])|([A-Za-z0-9:;@&=\+\$\,-_\.~\*'\(\)\/]))*)$/) { + $url.=$1; + } + else { + return undef; + } + + if ($query) { + if ($query =~ /^(((%[0-9A-Fa-f][0-9A-Fa-f])|([A-Za-z0-9;\/\?:@&=\+\$\,\-_\.~\*'\(\)]))+)$/) { + $url.='?'.$1; + } + else { + return undef; + } + } + return $url; +} + +sub path2url { + return joinurl(path2urldiv($_[0])); +} + +1; diff --git a/settings b/settings index fb844a8..fec9225 100644 --- a/settings +++ b/settings @@ -1,6 +1,7 @@ #all directory paths must end with '/' and must already exist. bin_path = /yplom/bin/proxy/ #Where the software will be located +lib_path = /yplom/lib/proxy/ data_path = /yplom/data/proxy/ #where the proxy will remember data; subdir: #access, pass, archive log_path = /yplom/log/proxy/ #where the proxy will remember data