# http://wiki.squid-cache.org/Features/AddonHelpers#Access_Control_.28ACL.29
# http://perldoc.perl.org/perlsec.html#Taint-mode
-###ACCESS_PATH;
-###TIMEOUT_UNLOCK;
-###TIMEOUT_INACT;
+use strict;
+###LIB;
+use proxy_lib qw(access);
$|=1;
-$timeout_unlock = TIMEOUT_UNLOCK*60;
-$timeout_inact = TIMEOUT_INACT*60;
while (<STDIN>) {
- $line=$_;
+ my $line=$_;
if ($line =~ s/^([0-9]+ )//) {
print $1;
}
- $accesstime = time();
+ my $accesstime = time();
# The input line is expected to have the IP.
if ($line =~ /^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)( .*)?$/) {
- $IP = $1;
-
- # If access file doesn't exist the proxy is not unlocked for this IP.
- $accesspath=ACCESS_PATH.$IP;
- if (! (-e $accesspath)) {
- print "ERR\n";
- }
- elsif (open ($accessfile,"+<",$accesspath)) {
- # The access file has to be locked otherwise one process may read it while
- # another one updates it.
- unless (flock ($accessfile, 2)) {
- close ($accessfile);
- print "ERR\n";
- next;
- }
- $unlocktime=<$accessfile>;
- $lasttime=<$accessfile>;
-
- # First line is the timestamp of proxy unlocking. Second line is the
- # timestamp of last access.
- $unlocktime =~ s/[\r\n]//g;
- $lasttime =~ s/[\r\n]//g;
-
- # The timestamps must be numbers.
- if ($unlocktime =~ /^([0-9]+)$/) {
- $unlocktime=int($1);
- }
- else {
- close ($accessfile);
- print "ERR\n";
- next;
- }
-
- if ($lasttime =~ /^([0-9]+)$/) {
- $lasttime=int($1);
- }
- else {
- close ($accessfile);
- print "ERR\n";
- next;
- }
-
- if ((abs($accesstime-$unlocktime)>$timeout_unlock) or (abs($accesstime-$lasttime)>$timeout_inact)){
- # If too much time passed proxy is not unlocked any more. Also the
- # access file is now useless and will be removed.
- close ($accessfile);
- unlink $accesspath;
- print "ERR\n";
- }
- else {
- # the proxy is unlocked. The access file will now be updated.
- if (seek($accessfile, 0, 0)) {
- print $accessfile "$unlocktime\n$accesstime\n";
- truncate ($accessfile , tell($accessfile));
- }
- close ($accessfile);
- print "OK\n";
- }
+ if (access($accesstime, $1, '')) {
+ print "OK\n";
}
else {
print "ERR\n";
$def{'LOGS_UNCOMPRESSED'} = "use constant LOGS_UNCOMPRESSED => ".$set{'logs_uncompressed'}.";";
$def{'LOGS_TOTAL'} = "use constant LOGS_TOTAL => ".$set{'logs_total'}.";";
$def{'REWRITE_URL'} = "use constant REWRITE_URL => '".$set{'https_proxy_domain'}.":".$set{'https_proxy_port'}."';";
+$def{'LIB'} = "use lib '".$set{'lib_path'}."';";
+
$def{'PATH'} = "\$ENV{'PATH'} = '".$set{'path'}."';";
$def{'CP'} = 'CM='.$set{'cp'};
$def{'RM'} = 'RM='.$set{'rm'};
$def{'OD'} = 'OD='.$set{'bin_path'};
+$def{'LD'} = 'LD='.$set{'lib_path'};
$def{'CM'} = 'CM='.$set{'chmod'};
###RM;
###CM;
###OD;
+###LD;
-all: moveout copyout remove config.txt
+all: moveout copyout moveoutlib remove config.txt
moveout: proxy proxy.pl cleararch oldlogs rewrite access access.pl setuid exec
copyout: setuid exec
# $(CP) access.pl $(OD)
+moveoutlib: proxy_lib.pm setuid exec
+ $(MV) proxy_lib.pm $(LD)
+
setuid: proxy access
$(CM) u+s proxy access
proxy.pl: proxy.1.pl configure.pl settings
$(PL) configure.pl settings <proxy.1.pl >proxy.pl
+proxy_lib.pm: proxy_lib.1.pm configure.pl settings
+ $(PL) configure.pl settings <proxy_lib.1.pm >proxy_lib.pm
+
proxy.c: proxy.1.c configure.pl settings
$(PL) configure.pl settings <proxy.1.c >proxy.c
# https://tools.ietf.org/html/rfc3875
# https://tools.ietf.org/html/rfc2396
+use strict;
+#use warnings;
+###LIB;
+use proxy_lib qw(urldecode divideurl urldiv2path getcgi formatheader access);
use POSIX qw(strftime);
###UNLOCK_LOG;
use constant REQUEST_HEADER_BLOCK => qr/^(accept-encoding|connection|te|transfer-encoding|via)$/;
use constant RESPONSE_HEADER_BLOCK => qr/^(connection|content-encoding|content-length|trailer|transfer-encoding)$/;
-$accesstime = time();
-$timeout_unlock = TIMEOUT_UNLOCK*60;
-$timeout_inact = TIMEOUT_INACT*60;
+
+my $accesstime = time();
+
+my $URL='';
+my $IP;
+my $prot='';
+my $host='';
+my $port='';
+my $path='';
+my $query='';
delete @ENV{qw(IFS CDPATH ENV BASH_ENV)};
###PATH;
}
else {
# proxy will work only if unlocked
- if(access()) {
+ if(access($accesstime,$ENV{'REMOTE_ADDR'},$ENV{'HTTP_HOST'})) {
proxy();
}
else {
# The HTTPS variable is not defined in rfc3875 (might be a portability
# problem?)
- $archive = 0; # if this http request should be archived
+ my $archive=0; # if this http request should be archived
+ my $postdata=0; # if there is data to pe posted
+ my $archpath=0;
+
+ my $respheadpath='';
+ my $respcontpath='';
+ my $postheadpath='';
+ my $postcontpath='';
+
+ my $respheadfile;
+ my $respcontfile;
+ my $postheadfile;
+ my $postcontfile;
+
+ my $postcontopen=0;
+ my $respcontopen=0;
+ my $respheadopen=0;
+
+ my $safelength=1; # if Content-length is safe to be forwarded
+ my $definedlength=0; # if Content-length is defined
+ my $safeerror=1; # if it's safe to send an error message.
+ my $lastskipped=1; # if the previous header field was not forwarded
+ my $forceexit=0; # if the proxy should exit with error
+ my $errormsg;
+
+ my $contentlength;
+
+ my @curl_arg=(CURL_PATH,'-i','-#'); # the command line arguments for curl
+ my @reqhead; # the HTTP request header
+ my @resphead;
+
+ #pipes
+ my $curloutrd;
+ my $curloutwr;
+ my $curlinrd;
+ my $curlinwr;
+ my $curlerrrd;
+ my $curlerrwr;
+
+ my $curlpid;
+
+ my $buffer;
+ my $buffer2;
# The proxy will not support request methods other than GET, POST, HEAD.
# There is no need for it.
# The URL will now be validated.
# Additionally it will be divided in parts which will be stored in variables:
# $prot (protocol), $host, $port, $path, $query.
- if (validateurl($URL)) {
+ if (($prot, $host, $port, $path, $query)=divideurl($URL)) {
$URL =~ /^(.*)$/;
$URL = $1;
}
return fail("Status: 403 Forbidden\n","403 Forbidden","The proxy does not accept port number $port because of infinite loop prevention.");
}
- @curl_arg=(CURL_PATH,'-i','-#'); # the command line arguments for curl
- @reqhead; # the HTTP request header
- foreach $envk (keys %ENV) {
+
+ foreach my $envk (keys %ENV) {
# The relevant http variables either start with HTTP_ or CONTENT_.
+ my $headarg='';
+ my $headval='';
+ my $headname='';
+
if ($envk =~ /^(HTTP_[A-Z0-9_]+)$/) {
$headname=formatheader($1);
}
# The path for archiving the HTTP request is made, the required directories
# are created.
# If this fails the request will not be archived but will still be proxied.
- $archpath = makepath($prot, $host, $port, $path, $query);
+ $archpath = urldiv2path($prot, $host, $port, $path, $query, 1);
if ($archpath) {
$archive = 1;
if ($postdata) {
# The request header is written.
if (open ($postheadfile, ">", $postheadpath)) {
- foreach $line (@reqhead) {
+ foreach my $line (@reqhead) {
print $postheadfile "$line\n";
}
- print $postheadfile "$line\n";
+ print $postheadfile "\n";
close ($postheadfile);
}
binmode($curlinwr) or die "Failed to switch stdin pipe to binary mode.\n";
binmode(STDIN) or die "Failed to switch STDIN to binary mode.\n";
- $buffer2;
-
$postcontopen = open($postcontfile, ">", $postcontpath);
if ($postcontopen){
unless (binmode($postcontfile)) {
# close ($curlinrd);
}
- $safelength=1; # if Content-length is safe to be forwarded
- $definedlength=0; # if Content-length is defined
- $safeerror=1; # if it's safe to send an error message.
- $lastskipped=1; # if the previous header field was not forwarded
- $forceexit=0; # if the proxy should exit with error
-
- if ($line = <$curloutrd>) {
+ if (defined (my $line = <$curloutrd>)) {
$line =~ s/\r?\n$//;
# The first line is the status.
if ($line =~ /^HTTP\/[0-9]+\.[0-9]+ ([0-9]+ .*)$/) {
- $status =$1;
+ my $status =$1;
@resphead=('Status: '.$status);
# If the status is not 2XX and this URL was already archived before it
}
# now read the header lines
- while ($line = <$curloutrd>) {
+ while (defined($line = <$curloutrd>)) {
$line =~ s/[\r\n]$//g;
+ my $headname='';
+ my $headval='';
# empty line (double \n) meane the end of the header.
if($line eq ''){
if ($line =~ /^([^:]*):(.*)$/) {
$headname=lc($1);
- $headvalue=$2;
+ $headval=$2;
}
else {
$lastskipped=1;
$safelength = 0;
}
if($headname eq 'content-encoding') {
- if($headvalue !~ /^[\r\n \t]*identity[\r\n \t]*$/) {
+ if($headval !~ /^[\r\n \t]*identity[\r\n \t]*$/) {
$safelength = 0;
}
}
if($headname eq 'content-length') {
$definedlength=1;
- $headvalue =~ s/^[\r\n \t]*//;
- $headvalue =~ s/[\r\n \t]*$//;
- $contentlength = int($headvalue);
+ $headval =~ s/^[\r\n \t]*//;
+ $headval =~ s/[\r\n \t]*$//;
+ $contentlength = int($headval);
}
# Some header fields should not be forwarded.
$lastskipped =1;
next;
}
- $line;
$lastskipped=0;
push @resphead, $line;
}
# Only add the Content-length header if it's safe to do so.
- if ($lengthdefined and ($safelength or ($contentlength==0))) {
+ if ($definedlength and ($safelength or ($contentlength==0))) {
push @resphead, 'Content-length: '.$contentlength;
}
}
else {
# Create the file only if there was content received. No 0-byte files.
- $firstline=1;
+ my $firstline=1;
while (read ($curloutrd,$buffer,1024)) {
unless (print (STDOUT $buffer)) {
$forceexit=1;
# Generate error message if possible.
if ($safeerror) {
$errormsg='';
- while ($line = <$curlerrrd>) {
+ while (defined(my $line = <$curlerrrd>)) {
$line =~ s/\r?\n/<br>/g;
$errormsg=$errormsg.$line;
}
}
-# This function checks if the user has unlocked the proxy. Nonzero means yes.
-sub access {
- if($ENV{'HTTP_HOST'} =~ UNLOCK_PROXY_HOST){
- return 1;
- }
-
- # Check if IP
- if ($ENV{'REMOTE_ADDR'} =~ /^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)$/) {
- $IP = $1;
-
- # 127.0.0.1 is always allowed. All https requests are tunnelled through
- # 127.0.0.1.
- if ($IP eq '127.0.0.1') {
- return 1;
- }
-
- # If access file doesn't exist the proxy is not unlocked for this IP.
- $accesspath=ACCESS_PATH.$IP;
- if (! (-e $accesspath)) {
- return 0;
- }
-
- elsif (open ($accessfile,"+<",$accesspath)) {
- # The access file has to be locked otherwise one process may read it while
- # another one updates it.
- unless (flock ($accessfile, 2)) {
- close ($accessfile);
- return 0;
- }
- # First line is the timestamp of proxy unlocking. Second line is the
- # timestamp of last access.
- $unlocktime=<$accessfile>;
- $lasttime=<$accessfile>;
-
- $unlocktime =~ s/[\r\n]//g;
- $lasttime =~ s/[\r\n]//g;
-
- # The timestamps must be numbers.
- if ($unlocktime =~ /^([0-9]+)$/) {
- $unlocktime=int($1);
- }
- else {
- close ($accessfile);
- return 0;
- }
-
- if ($lasttime =~ /^([0-9]+)$/) {
- $lasttime=int($1);
- }
- else {
- close ($accessfile);
- return 0;
- }
-
- if ((abs($accesstime-$unlocktime)>$timeout_unlock) or (abs($accesstime-$lasttime)>$timeout_inact)){
- # If too much time passed proxy is not unlocked any more. Also the
- # access file is now useless and will be removed.
- close ($accessfile);
- unlink $accesspath;
- return 0;
- }
- else {
- # the proxy is unlocked. The access file will now be updated.
- if (seek($accessfile, 0, 0)) {
- print $accessfile "$unlocktime\n$accesstime\n";
- truncate ($accessfile , tell($accessfile));
- }
- close ($accessfile);
- return 1;
- }
- }
- else {
- return 0;
- }
- }
- else {
- return 0;
- }
-}
-
# Function to unlock the proxy.
sub unlock {
+ my %CGI;
+ my $passpath;
+ my $passfile;
+ my $logfile;
+ my $pass;
+ my $accessfile;
+ my $accesspath;
+
if ($ENV{'REMOTE_ADDR'} =~ /^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)$/) {
$IP=$1;
}
}
-# The function to get CGI parameters from string.
-# Format is: name=url_encoded_value&name=url_encoded_value& ... &name=url_encoded_value
-sub getcgi {
- my $arg;
- my $varl;
- my %cgi;
- my $i = $_[0];
- $i =~ s/[\r\n]//g;
- my @s = split('&',$i);
- foreach my $l ( @s) {
- ($arg,$val)=split('=',$l);
- $cgi{$arg}=urldecode($val);
- }
- return %cgi;
-}
-
-# Function for decoding URL-encoded text
-sub urldecode {
- my $t = $_[0];
- $t =~ s/\+/ /g;
- $t =~ s/%([a-fA-F0-9]{2})/chr(hex($1))/eg;
- return $t;
-}
-
-# Function for formatting header field names.
-# The Environment variable name is like this: HTTP_HEADER_NAME or HEADER_NAME.
-# But should be Header-Name.
-# Actually the names are case-insensitive but in practise they often look like
-# this. (Otherwise suspicious?)
-sub formatheader {
- my $t = $_[0];
- $t =~ s/^HTTP_//;
- $t = lc($t);
- $t =~ s/^([a-z])/uc($1)/e;
- $t =~ s/_([a-z])/'-'.uc($1)/eg;
- return $t;
-}
-
# Function for showing the proxy unlock form page.
# arguments: 1 - additional message (optional), 2 - additional header fields
# (optional)
print '<a href="'.UNLOCK_PROXY_URL.'">'.UNLOCK_PROXY_URL.'</a>';
print "</body></html>\n";
}
-
-# Function to show debug information. Not used any more.
-sub debag {
- print "Content-type: text/plain\n\n";
- foreach $envk (keys %ENV) {
- print "$envk = $ENV{$envk}\n";
- }
- print "\n";
-
- foreach $line ( <STDIN> ) {
- print $line;
- }
- print "\n\n";
- if ($ENV{'REQUEST_URI'} =~ /^[a-z]*(:[0-9]*)?:\/\//){
- #if
- $URL=$ENV{'REQUEST_URI'}
- }
- else{
- $URL=($ENV{'HTTPS'} eq 'on' ? 'https://' : 'http://').$ENV{'HTTP_HOST'}.$ENV{'REQUEST_URI'};
- }
- print 'URL: ',$URL,"\n";
-}
-
-# Function to check URL and divide in parts: protocol, hostname, port number,
-# path, query string.
-sub validateurl {
- my $url = $_[0];
- $prot;
- my $hostportpathquery;
- my $hostportpath;
- $query;
- my $hostport;
- $path;
- my $part;
- $host;
- $port;
-
- if ($url =~ /^([A-Za-z]+):\/\/(.*)/) {
- $prot = $1;
- $hostportpathquery = $2;
- }
- else {
- return 0;
- }
-
- if ($prot !~ /^https?$/) {
- return 0;
- }
-
- if ($hostportpathquery eq ''){
- return 0;
- }
-
- if ($hostportpathquery =~ /^([^?]+)\?(.*)$/) {
- $hostportpath = $1;
- $query = $2;
-
- if ($query !~ /^((%[0-9A-Fa-f][0-9A-Fa-f])|([A-Za-z0-9;\/\?:@&=\+\$\,\-_\.~\*'\(\)]))*$/) {
- return 0;
- }
- }
- else {
- $hostportpath = $hostportpathquery;
- $query = '';
- }
-
- if($hostportpath =~ /^([^\/]+)(\/.*)$/){
- $hostport = $1;
- $path = $2;
-
- if ($path !~ /^((%[0-9A-Fa-f][0-9A-Fa-f])|([A-Za-z0-9:;@&=\+\$\,-_\.~\*'\(\)]))*$/) {
- return 0;
- }
- $path=urldecode($path);
- }
- else {
- $hostport = $hostportpath;
- $path = '';
- }
-
- if ($hostport =~ /^(.*):([0-9]+)$/) {
- $host = $1;
- $port = $2;
- }
- else {
- $host=$hostport;
- $port='';
- }
-
- if ($host !~ /^[A-Za-z0-9\-\.]+$/) {
- return 0;
- }
-
- return 1;
-}
-
-# Function to convert URL to archive path. Also creates required directories.
-# Returns the path on success and empty string on fail.
-# Inspired by the MIRA browser.
-sub makepath {
- (my $prot, my $host, my $port, my $path, my $query) = @_;
-
- my $archpath = ARCH_PATH;
-
- # First subdir: protocol @p port_number
- if ($prot =~ /^(https?)$/) {
- $archpath .= $1;
- }
- else {
- return '';
- }
-
- if ($port =~ /^([0-9]+)$/) {
- $archpath .= "\@p$1";
- }
-
- unless (-d $archpath)
- {
- unless (mkdir $archpath) {
- return '';
- }
- }
-
- # Host name: each segment between dots is a subdir, revedrsed order, last part
- # ends with @n.
- # For example www.example.com becomes
- # ... /com/example/www@/ ...
-
- # When segment is longer than 120 characters it's divided into 64 character
- # parts ending with '-' until the last one is not longer than 120.
- # For example
- # www.aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz.com
- # becomes
- # ... /com/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggg-/gggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmm-/mmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz/www@n/ ...
-
- if($host =~ /^([A-Za-z0-9\-\.]+)$/) {
- $host = $1;
- }
- else {
- return '';
- }
-
- while ((my $ind = rindex ($host, '.'))>=0) {
- my $part= substr $host, $ind+1;
- $host = substr $host, 0, $ind;
-
- while (length ($part) > 120) {
- $archpath .= '/'.substr($part,0,64).'-';
- $part = substr($part,64);
- unless (-d $archpath)
- {
- unless (mkdir $archpath) {
- return '';
- }
- }
- }
- $archpath .= '/'.$part;
- unless (-d $archpath)
- {
- unless (mkdir $archpath) {
- return '';
- }
- }
- }
- while (length ($host) > 120) {
- $archpath .= '/'.substr($host,0,64).'-';
- $host = substr($host,64);
- unless (-d $archpath)
- {
- unless (mkdir $archpath) {
- return '';
- }
- }
- }
- $archpath .= '/'.$host.'@n';
- unless (-d $archpath)
- {
- unless (mkdir $archpath) {
- return '';
- }
- }
-
- # Path: each segment between '/' is subdir. The segment after the last '/' is
- # not a subdir - it is part of the file name. And it will be appended with
- # some characters outside of this function: @g, @h, @u or @v.
- # Exception: when there is a query string the last segment is a subdir too and
- # ends with q.
- # Characters that are not letters or numbers or '_' or '.' are encoded. Like
- # in URL-encoding but with '@' instead of '%'.
- # When segment is longer than 120 characters it's divided into 64 character
- # parts ending with '-' until the last one is not longer than 120.
-
- # For example:
- # /some/path/?a=1 becomes ... /some/path@q/ ...
- # /some/other-path becomes ... /some/other@2Dpath
- # /path/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz/yyy
- # becomes
- # ... /path/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggg-/gggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmm-/mmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz/yyy
-
- $path =~ s/^\///;
-
- while ((my $ind = index ($path, '/'))>=0) {
- my $part = substr $path, 0, $ind;
- $path= substr $path, $ind+1;
-
- $part =~ s/([^A-Za-z0-9_\.])/sprintf ("@%02X",ord($1))/eg;
-
- while (length ($part) > 120) {
- $archpath .= '/'.substr($part,0,64).'-';
- $part = substr($part,64);
- unless (-d $archpath)
- {
- unless (mkdir $archpath) {
- return '';
- }
- }
- }
- $archpath .= '/'.$part;
- unless (-d $archpath)
- {
- unless (mkdir $archpath) {
- return '';
- }
- }
- }
- $path =~ s/([^A-Za-z0-9_\.])/sprintf ("@%02X",ord($1))/eg;
- while (length ($path) > 120) {
- $archpath .= '/'.substr($path,0,64).'-';
- $path = substr($path,64);
- unless (-d $archpath)
- {
- unless (mkdir $archpath) {
- return '';
- }
- }
- }
-
- if ($query) {
- # query string: The last part is not a subdir - it is part of the file name.
- # And it will be appended with some characters outside of this function: @g,
- # @h, @u or @v.
- # When Query string is longer than 240 characters it's divided into 128
- # character parts ending with '-' until the last one is not longer than 240.
- # Characters that are not letters or numbers or '_' or '.' are encoded. Like
- # in URL-encoding but with '@' instead of '%'.
- # When segment is longer than 120 characters it's divided into 64 character
- # parts ending with '-' until the last one is not longer than 120.
- # The '?' at the beginning is not part of the query string.
- # For example:
- # ?a=1&b=%25%5E%26 becomes ... /a@3D1@26b@3D@2525@255E@2526
- # ?a=aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz&b=aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz
- # becomes
- # ... /a@3Daaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmm-/mmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz@26@b@3Daaaaaaaaaabbbb-/bbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz
- $query =~ s/([^A-Za-z0-9_\.])/sprintf ("@%02X",ord($1))/eg;
-
- $archpath .= '/'.$path.'@q';
-
- $path = $query;
- unless (-d $archpath)
- {
- unless (mkdir $archpath) {
- return '';
- }
- }
-
- while (length ($path) > 240) {
- $archpath .= '/'.substr($path,0,128).'-';
- $path = substr($path,128);
- unless (-d $archpath)
- {
- unless (mkdir $archpath) {
- return '';
- }
- }
- }
- }
- $archpath .= '/'.$path;
- return $archpath;
-}
--- /dev/null
+# proxy_lib.pm is generated from proxy_lib.1.pm\r
+#\r
+# Some of the functions would be used by multiple bots for different platforms\r
+# using the proxy archive.\r
+\r
+package proxy_lib;\r
+\r
+use strict;\r
+#use warnings\r
+use Exporter;\r
+use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);\r
+\r
+###ARCH_PATH;\r
+###ACCESS_PATH;\r
+###TIMEOUT_UNLOCK;\r
+###TIMEOUT_INACT;\r
+###UNLOCK_PROXY_HOST;\r
+\r
+$VERSION = 0.000002;\r
+@ISA = qw(Exporter);\r
+@EXPORT = ();\r
+@EXPORT_OK = qw(access divideurl formatheader getcgi joinurl path2url url2path path2urldiv urldecode urldiv2path);\r
+%EXPORT_TAGS = ();\r
+\r
+# This function checks if the user has unlocked the proxy. Nonzero means yes.\r
+sub access { \r
+ (my $time, my $ip, my $host) = @_;\r
+ my $timeout_unlock = TIMEOUT_UNLOCK*60;\r
+ my $timeout_inact = TIMEOUT_INACT*60;\r
+ my $accesspath='';\r
+ my $accessfile;\r
+ my $lasttime;\r
+ my $unlocktime;\r
+ \r
+ if($host =~ UNLOCK_PROXY_HOST){\r
+ return 1;\r
+ }\r
+ \r
+ # Check if IP\r
+ if ($ip =~ /^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)$/) {\r
+ $ip = $1;\r
+ \r
+ # 127.0.0.1 is always allowed. All https requests are tunnelled through\r
+ # 127.0.0.1.\r
+ if ($ip eq '127.0.0.1') {\r
+ return 1;\r
+ }\r
+ \r
+ # If access file doesn't exist the proxy is not unlocked for this IP.\r
+ $accesspath=ACCESS_PATH.$ip;\r
+ if (! (-e $accesspath)) {\r
+ return 0;\r
+ }\r
+ \r
+ elsif (open ($accessfile,"+<",$accesspath)) {\r
+ # The access file has to be locked otherwise one process may read it while\r
+ # another one updates it.\r
+ unless (flock ($accessfile, 2)) {\r
+ close ($accessfile);\r
+ return 0;\r
+ }\r
+ # First line is the timestamp of proxy unlocking. Second line is the\r
+ # timestamp of last access.\r
+ $unlocktime=<$accessfile>;\r
+ $lasttime=<$accessfile>;\r
+\r
+ $unlocktime =~ s/[\r\n]//g;\r
+ $lasttime =~ s/[\r\n]//g;\r
+ \r
+ # The timestamps must be numbers.\r
+ if ($unlocktime =~ /^([0-9]+)$/) {\r
+ $unlocktime=int($1);\r
+ }\r
+ else {\r
+ close ($accessfile);\r
+ return 0;\r
+ }\r
+ \r
+ if ($lasttime =~ /^([0-9]+)$/) {\r
+ $lasttime=int($1);\r
+ }\r
+ else {\r
+ close ($accessfile);\r
+ return 0;\r
+ }\r
+ \r
+ if ((abs($time-$unlocktime)>$timeout_unlock) or (abs($time-$lasttime)>$timeout_inact)){\r
+ # If too much time passed proxy is not unlocked any more. Also the\r
+ # access file is now useless and will be removed.\r
+ close ($accessfile);\r
+ unlink $accesspath;\r
+ return 0;\r
+ }\r
+ else {\r
+ # the proxy is unlocked. The access file will now be updated.\r
+ if (seek($accessfile, 0, 0)) {\r
+ print $accessfile "$unlocktime\n$time\n";\r
+ truncate ($accessfile , tell($accessfile));\r
+ }\r
+ close ($accessfile);\r
+ return 1;\r
+ }\r
+ }\r
+ else {\r
+ return 0;\r
+ }\r
+ }\r
+ else {\r
+ return 0;\r
+ }\r
+}\r
+\r
+\r
+# Function to check URL and divide in parts: protocol, hostname, port number,\r
+# path, query string.\r
+sub divideurl {\r
+ my $url = $_[0];\r
+ my $prot;\r
+ my $hostportpathquery;\r
+ my $hostportpath;\r
+ my $query;\r
+ my $hostport;\r
+ my $path;\r
+ my $part;\r
+ my $host;\r
+ my $port;\r
+ \r
+ if ($url =~ /^([A-Za-z]+):\/\/(.*)/) {\r
+ $prot = $1;\r
+ $hostportpathquery = $2;\r
+ }\r
+ else {\r
+ return ();\r
+ }\r
+ \r
+ if ($prot !~ /^https?$/) {\r
+ return ();\r
+ }\r
+ \r
+ if ($hostportpathquery eq ''){\r
+ return ();\r
+ }\r
+ \r
+ if ($hostportpathquery =~ /^([^?]+)\?(.*)$/) {\r
+ $hostportpath = $1;\r
+ $query = $2;\r
+ \r
+ if ($query !~ /^((%[0-9A-Fa-f][0-9A-Fa-f])|([A-Za-z0-9;\/\?:@&=\+\$\,\-_\.~\*'\(\)]))*$/) {\r
+ return ();\r
+ }\r
+ }\r
+ else {\r
+ $hostportpath = $hostportpathquery;\r
+ $query = '';\r
+ }\r
+ \r
+ if($hostportpath =~ /^([^\/]+)(\/.*)$/){\r
+ $hostport = $1;\r
+ $path = $2;\r
+ \r
+ if ($path !~ /^((%[0-9A-Fa-f][0-9A-Fa-f])|([A-Za-z0-9:;@&=\+\$\,-_\.~\*'\(\)]))*$/) {\r
+ return ();\r
+ }\r
+ $path=urldecode($path);\r
+ }\r
+ else {\r
+ $hostport = $hostportpath;\r
+ $path = '';\r
+ }\r
+ \r
+ if ($hostport =~ /^(.*):([0-9]+)$/) {\r
+ $host = $1;\r
+ $port = $2;\r
+ }\r
+ else {\r
+ $host=$hostport;\r
+ $port='';\r
+ }\r
+ \r
+ if ($host !~ /^[A-Za-z0-9\-\.]+$/) {\r
+ return ();\r
+ }\r
+ \r
+ return ($prot, $host, $port, $path, $query);\r
+}\r
+\r
+# Function for decoding URL-encoded text\r
+sub urldecode {\r
+ my $t = $_[0];\r
+ $t =~ s/\+/ /g;\r
+ $t =~ s/%([a-fA-F0-9]{2})/chr(hex($1))/eg;\r
+ return $t;\r
+}\r
+\r
+# The function to get CGI parameters from string.\r
+# Format is: name=url_encoded_value&name=url_encoded_value& ... &name=url_encoded_value\r
+sub getcgi {\r
+ my $arg;\r
+ my $val;\r
+ my %cgi;\r
+ my $i = $_[0];\r
+ $i =~ s/[\r\n]//g;\r
+ my @s = split('&',$i);\r
+ foreach my $l ( @s) {\r
+ ($arg,$val)=split('=',$l);\r
+ $cgi{$arg}=urldecode($val);\r
+ }\r
+ return %cgi;\r
+}\r
+\r
+# Function for formatting header field names.\r
+# The Environment variable name is like this: HTTP_HEADER_NAME or HEADER_NAME.\r
+# But should be Header-Name.\r
+# Actually the names are case-insensitive but in practise they often look like\r
+# this. (Otherwise suspicious?)\r
+sub formatheader {\r
+ my $t = $_[0];\r
+ $t =~ s/^HTTP_//;\r
+ $t = lc($t);\r
+ $t =~ s/^([a-z])/uc($1)/e;\r
+ $t =~ s/_([a-z])/'-'.uc($1)/eg;\r
+ return $t;\r
+}\r
+\r
+# Function to convert URL to archive path. Also creates required directories\r
+# if $mkdir true.\r
+# Returns the path on success and undef on fail.\r
+# Inspired by the MIRA browser.\r
+sub urldiv2path {\r
+ (my $prot, my $host, my $port, my $path, my $query, my $mkdir) = @_;\r
+ \r
+ my $archpath = ARCH_PATH;\r
+ \r
+ # First subdir: protocol @p port_number\r
+ if ($prot =~ /^(https?)$/) {\r
+ $archpath .= $1;\r
+ }\r
+ else {\r
+ return undef;\r
+ }\r
+ \r
+ unless ($port){\r
+ $port=($prot eq 'https')?'443':'80';\r
+ }\r
+ if ($port =~ /^([0-9]+)$/) {\r
+ $archpath .= "\@p$1";\r
+ }\r
+ else {\r
+ return undef;\r
+ }\r
+ \r
+ if ($mkdir and !(-d $archpath))\r
+ {\r
+ unless (mkdir $archpath) {\r
+ return undef;\r
+ }\r
+ }\r
+ \r
+ # Host name: each segment between dots is a subdir, revedrsed order, last part\r
+ # ends with @n.\r
+ # For example www.example.com becomes\r
+ # ... /com/example/www@/ ...\r
+ \r
+ # When segment is longer than 120 characters it's divided into 64 character\r
+ # parts ending with '-' until the last one is not longer than 120.\r
+ # For example\r
+ # www.aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz.com\r
+ # becomes\r
+ # ... /com/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggg-/gggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmm-/mmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz/www@n/ ...\r
+ \r
+ if($host =~ /^([A-Za-z0-9\-\.]+)$/) {\r
+ $host = $1;\r
+ }\r
+ else {\r
+ return undef;\r
+ }\r
+ \r
+ while ((my $ind = rindex ($host, '.'))>=0) {\r
+ my $part= substr $host, $ind+1;\r
+ $host = substr $host, 0, $ind;\r
+ \r
+ while (length ($part) > 120) {\r
+ $archpath .= '/'.substr($part,0,64).'-';\r
+ $part = substr($part,64);\r
+ if ($mkdir and !(-d $archpath))\r
+ {\r
+ unless (mkdir $archpath) {\r
+ return undef;\r
+ }\r
+ }\r
+ }\r
+ $archpath .= '/'.$part; \r
+ if ($mkdir and !(-d $archpath))\r
+ {\r
+ unless (mkdir $archpath) {\r
+ return undef;\r
+ }\r
+ }\r
+ }\r
+ while (length ($host) > 120) {\r
+ $archpath .= '/'.substr($host,0,64).'-';\r
+ $host = substr($host,64);\r
+ if ($mkdir and !(-d $archpath))\r
+ {\r
+ unless (mkdir $archpath) {\r
+ return undef;\r
+ }\r
+ }\r
+ }\r
+ $archpath .= '/'.$host.'@n'; \r
+ if ($mkdir and !(-d $archpath))\r
+ {\r
+ unless (mkdir $archpath) {\r
+ return undef;\r
+ }\r
+ }\r
+ \r
+ # Path: each segment between '/' is subdir. The segment after the last '/' is\r
+ # not a subdir - it is part of the file name. And it will be appended with\r
+ # some characters outside of this function: @g, @h, @u or @v.\r
+ # Exception: when there is a query string the last segment is a subdir too and\r
+ # ends with q.\r
+ # Characters that are not letters or numbers or '_' or '.' are encoded. Like\r
+ # in URL-encoding but with '@' instead of '%'.\r
+ # When segment is longer than 120 characters it's divided into 64 character\r
+ # parts ending with '-' until the last one is not longer than 120.\r
+ \r
+ # For example:\r
+ # /some/path/?a=1 becomes ... /some/path@q/ ...\r
+ # /some/other-path becomes ... /some/other@2Dpath\r
+ # /path/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz/yyy\r
+ # becomes\r
+ # ... /path/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggg-/gggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmm-/mmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz/yyy\r
+ \r
+ $path =~ s/^\///;\r
+ \r
+ while ((my $ind = index ($path, '/'))>=0) {\r
+ my $part = substr $path, 0, $ind;\r
+ $path= substr $path, $ind+1;\r
+ \r
+ $part =~ s/([^A-Za-z0-9_\.])/sprintf ("@%02X",ord($1))/eg;\r
+ \r
+ while (length ($part) > 120) {\r
+ $archpath .= '/'.substr($part,0,64).'-';\r
+ $part = substr($part,64);\r
+ if ($mkdir and !(-d $archpath))\r
+ {\r
+ unless (mkdir $archpath) {\r
+ return undef;\r
+ }\r
+ }\r
+ }\r
+ $archpath .= '/'.$part; \r
+ if ($mkdir and !(-d $archpath))\r
+ {\r
+ unless (mkdir $archpath) {\r
+ return undef;\r
+ }\r
+ }\r
+ }\r
+ $path =~ s/([^A-Za-z0-9_\.])/sprintf ("@%02X",ord($1))/eg;\r
+ while (length ($path) > 120) {\r
+ $archpath .= '/'.substr($path,0,64).'-';\r
+ $path = substr($path,64);\r
+ if ($mkdir and !(-d $archpath))\r
+ {\r
+ unless (mkdir $archpath) {\r
+ return undef;\r
+ }\r
+ }\r
+ }\r
+ \r
+ if ($query) {\r
+ # query string: The last part is not a subdir - it is part of the file name.\r
+ # And it will be appended with some characters outside of this function: @g,\r
+ # @h, @u or @v.\r
+ # When Query string is longer than 240 characters it's divided into 128\r
+ # character parts ending with '-' until the last one is not longer than 240.\r
+ # Characters that are not letters or numbers or '_' or '.' are encoded. Like\r
+ # in URL-encoding but with '@' instead of '%'.\r
+ # When segment is longer than 120 characters it's divided into 64 character\r
+ # parts ending with '-' until the last one is not longer than 120.\r
+ # The '?' at the beginning is not part of the query string.\r
+ # For example:\r
+ # ?a=1&b=%25%5E%26 becomes ... /a@3D1@26b@3D@2525@255E@2526\r
+ # ?a=aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz&b=aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz\r
+ # becomes\r
+ # ... /a@3Daaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmm-/mmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz@26@b@3Daaaaaaaaaabbbb-/bbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz\r
+ $query =~ s/([^A-Za-z0-9_\.])/sprintf ("@%02X",ord($1))/eg;\r
+ \r
+ $archpath .= '/'.$path.'@q';\r
+ \r
+ $path = $query;\r
+ if ($mkdir and !(-d $archpath))\r
+ {\r
+ unless (mkdir $archpath) {\r
+ return undef;\r
+ }\r
+ }\r
+ \r
+ while (length ($path) > 240) {\r
+ $archpath .= '/'.substr($path,0,128).'-';\r
+ $path = substr($path,128);\r
+ if ($mkdir and !(-d $archpath))\r
+ {\r
+ unless (mkdir $archpath) {\r
+ return undef;\r
+ }\r
+ }\r
+ }\r
+ }\r
+ $archpath .= '/'.$path;\r
+ return $archpath;\r
+}\r
+\r
+sub url2path {\r
+ return urldiv2path(divideurl($_[0]));\r
+}\r
+\r
+sub path2urldiv {\r
+ my $archpath = $_[0];\r
+ my $prot;\r
+ my $host;\r
+ my $port;\r
+ my $path;\r
+ my $query;\r
+ \r
+ if (index($archpath, ARCH_PATH) != 0) {\r
+ return ();\r
+ }\r
+ $archpath = substr ($archpath, length(ARCH_PATH));\r
+ \r
+ $archpath =~ s/-\///g;\r
+\r
+ if ($archpath =~/^((https?)\@p([0-9]+)\/)/) {\r
+ $archpath = substr($archpath, length($1));\r
+ $prot = $2;\r
+ $port = $3;\r
+ }\r
+ else {\r
+ return ();\r
+ }\r
+ \r
+ if ($archpath =~ /^(([A-Za-z0-9\-\/]+)\@n)\//) {\r
+ $archpath = substr($archpath, length($1));\r
+ my $host2 = $2;\r
+ \r
+ while ((my $ind = rindex ($host2, '/'))>=0) {\r
+ $host .= (substr $host2, $ind+1).'.';\r
+ $host2 = substr $host2, 0, $ind;\r
+ }\r
+ $host .= $host2;\r
+ }\r
+ else {\r
+ return ();\r
+ }\r
+ \r
+ $archpath =~ s/@[a-z]$//;\r
+ \r
+ if ((my $ind = rindex ($archpath, '@q/'))>=0) {\r
+ $query = substr ($archpath, $ind+3);\r
+ $archpath = substr $archpath, 0, $ind;\r
+ if ($query =~ /^(([A-Za-z0-9_\.]|(@[0-9A-F][0-9A-F]))*)$/) {\r
+ $query=$1;\r
+ }\r
+ else {\r
+ return ();\r
+ }\r
+ $query =~ s/@([A-F0-9]{2})/chr(hex($1))/eg;\r
+ }\r
+ else {\r
+ $query = '';\r
+ }\r
+ \r
+ if ($archpath =~ /^(([A-Za-z0-9_\.\/]|(@[0-9A-F][0-9A-F]))*)$/) {\r
+ $path=$1;\r
+ }\r
+ else {\r
+ return ();\r
+ }\r
+ $path =~ s/@([A-F0-9]{2})/chr(hex($1))/eg;\r
+ return ($prot, $host, $port, $path, $query);\r
+}\r
+\r
+sub joinurl {\r
+ (my $prot, my $host, my $port, my $path, my $query) = @_;\r
+ my $url;\r
+ if ($prot =~ /^(https?)$/) {\r
+ $url=$1.'://';\r
+ }\r
+ else {\r
+ return undef;\r
+ }\r
+ \r
+ if($host =~ /^([A-Za-z0-9\-\.]+)$/) {\r
+ $url.=$1;\r
+ }\r
+ else {\r
+ return undef;\r
+ }\r
+ \r
+ if ($port =~ /^([0-9]*)$/) {\r
+ $port = $1;\r
+ }\r
+ else {\r
+ return undef;\r
+ }\r
+ if (($port == 80 and $prot eq 'http') or ($port == 443 and $prot eq 'https')) {\r
+ $port='';\r
+ }\r
+ if ($port) {\r
+ $url.=':'.$port;\r
+ }\r
+ \r
+ if ($path =~ /^(\/((%[0-9A-Fa-f][0-9A-Fa-f])|([A-Za-z0-9:;@&=\+\$\,-_\.~\*'\(\)\/]))*)$/) {\r
+ $url.=$1;\r
+ }\r
+ else {\r
+ return undef;\r
+ }\r
+ \r
+ if ($query) {\r
+ if ($query =~ /^(((%[0-9A-Fa-f][0-9A-Fa-f])|([A-Za-z0-9;\/\?:@&=\+\$\,\-_\.~\*'\(\)]))+)$/) {\r
+ $url.='?'.$1;\r
+ }\r
+ else {\r
+ return undef;\r
+ }\r
+ }\r
+ return $url;\r
+}\r
+\r
+sub path2url {\r
+ return joinurl(path2urldiv($_[0]));\r
+}\r
+\r
+1;\r
#all directory paths must end with '/' and must already exist.
bin_path = /yplom/bin/proxy/ #Where the software will be located
+lib_path = /yplom/lib/proxy/
data_path = /yplom/data/proxy/ #where the proxy will remember data; subdir:
#access, pass, archive
log_path = /yplom/log/proxy/ #where the proxy will remember data