Archiving finished for now.

author b <b@05ce6ebb-7522-4a6e-a768-0026ae12be9f>

Wed, 18 Nov 2015 21:59:21 +0000 (21:59 +0000)

committer b <b@05ce6ebb-7522-4a6e-a768-0026ae12be9f>

Wed, 18 Nov 2015 21:59:21 +0000 (21:59 +0000)
author b <b@05ce6ebb-7522-4a6e-a768-0026ae12be9f>
Wed, 18 Nov 2015 21:59:21 +0000 (21:59 +0000)
committer b <b@05ce6ebb-7522-4a6e-a768-0026ae12be9f>
Wed, 18 Nov 2015 21:59:21 +0000 (21:59 +0000)
diff --git a/access.1.c b/access.1.c

index 160d4b86986fbf3ab6e66165e8a3be09abed1987..442b8276a94c2878074b97a7c3c762af1f85d5b7 100644 (file)
--- a/access.1.c
+++ b/access.1.c
@@ -1,4 +1,9 @@
-//The SETUID wrapper.
+// access.c is generated from access.1.c
+//
+// This is the wrapper for access.pl.
+// It's run with SETUID to have accesss to some files where the www server
+// should not. That's why it has a C wrapper. In modern systems running scripts
+// directly with SETUID is considered unsafe and not allowed.
  
  #include <unistd.h>
  #include <stdio.h>
diff --git a/access.1.pl b/access.1.pl

index fee2b6366d63711ea21caa1bf809c78f13ef5736..4cd3e2f09097dd6bd2ae7dd3bc2c2f01df345917 100644 (file)
--- a/access.1.pl
+++ b/access.1.pl
@@ -1,5 +1,30 @@
  ###PERL;
  
+# access.pl is generated from access.1.pl
+#
+# This is the Squid helper program used for access control, it detects if the
+# proxy is unlocked for the user and tells Squid if it can open the tunnel or
+# not.
+#
+# It's run with SETUID to have accesss to some files where the Squid server
+# should not. That's why it has a C wrapper. In modern systems running scripts
+# directly with SETUID is considered unsafe and not allowed.
+# When running with SETUID the program also has to deal with Perl's built-in
+# securuity.
+#
+# Any data comming from standard input, files, environment variables, command
+# line is treated by Perl as unsafe. Any data created by operations on unsafe
+# data is also unsafe. Perl will not allow to use such data to open files (for
+# writing), launch programs, etc.
+# That's why the data has to be validated and made safe, like this:
+# if ($variable =~/^(some-regex)$/) {
+#   $variable = $1;
+# }
+#
+# see also:
+# http://wiki.squid-cache.org/Features/AddonHelpers#Access_Control_.28ACL.29
+# http://perldoc.perl.org/perlsec.html#Taint-mode
+
  ###ACCESS_PATH;
  ###TIMEOUT_UNLOCK;
  ###TIMEOUT_INACT;
@@ -15,15 +40,19 @@ while (<STDIN>) {
         }
         
         $accesstime = time();
-       # print '+++ '.$line."\n";
         
+       # The input line is expected to have the IP.
         if ($line =~ /^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)( .*)?$/) {
                 $IP = $1;
+               
+               # If access file doesn't exist the proxy is not unlocked for this IP.
                 $accesspath=ACCESS_PATH.$IP;
                 if (! (-e $accesspath)) {
-                       print "ERR'n";
+                       print "ERR\n";
                 }
                 elsif (open ($accessfile,"+<",$accesspath)) {
+                       # The access file has to be locked otherwise one process may read it while
+                       # another one updates it.
                         unless (flock ($accessfile, 2)) {
                                 close ($accessfile);
                                 print "ERR\n";
@@ -32,9 +61,12 @@ while (<STDIN>) {
                         $unlocktime=<$accessfile>;
                         $lasttime=<$accessfile>;
                         
+                       # First line is the timestamp of proxy unlocking. Second line is the
+                       # timestamp of last access.
                         $unlocktime =~ s/[\r\n]//g;
                         $lasttime =~ s/[\r\n]//g;
                         
+                       # The timestamps must be numbers.
                         if ($unlocktime =~ /^([0-9]+)$/) {
                                 $unlocktime=int($1);
                         }
@@ -54,11 +86,14 @@ while (<STDIN>) {
                         }
                         
                         if ((abs($accesstime-$unlocktime)>$timeout_unlock) or (abs($accesstime-$lasttime)>$timeout_inact)){
+                               # If too much time passed proxy is not unlocked any more. Also the
+                               # access file is now useless and will be removed.
                                 close ($accessfile);
                                 unlink $accesspath;
                                 print "ERR\n";
                         }
                         else {
+                               # the proxy is unlocked. The access file will now be updated.
                                 if (seek($accessfile, 0, 0)) {
                                         print $accessfile "$unlocktime\n$accesstime\n";
                                         truncate ($accessfile , tell($accessfile));
@@ -68,7 +103,7 @@ while (<STDIN>) {
                         }
                 }
                 else {
-                       print "BH\n";
+                       print "ERR\n";
                 }
         }
         else {
diff --git a/cleararch.1.pl b/cleararch.1.pl

new file mode 100644 (file)

index 0000000..63c1b5f
--- /dev/null
+++ b/cleararch.1.pl
@@ -0,0 +1,67 @@
+###PERL;\r
+\r
+# cleararch is generated from cleararch.1.pl.\r
+#\r
+# This script recursively clears the archive from old files. Otherwise it would\r
+# grow forever.\r
+\r
+use POSIX qw(strftime);\r
+\r
+###ARCH_PATH;\r
+###TIMEOUT_ARCH;\r
+\r
+$time = time();\r
+print strftime("%d.%m.%Y %H:%M:%S", gmtime($time))."\n";\r
+cleardir(ARCH_PATH);\r
+\r
+sub cleardir {\r
+       (my $dirpath) = @_;\r
+       my $dir;\r
+       my $returnvalue = 1;\r
+       my $subpath;\r
+       my $subpathfull;\r
+       my @stat;\r
+       unless ( opendir ($dir, $dirpath)) {\r
+               print "Failed to open: $dirpath\/\n";\r
+               return 0;\r
+       }\r
+       while ($subpath = readdir $dir) {\r
+               $subpathfull=$dirpath.$subpath;\r
+               if ($subpath =~ /^\.\.?$/) {\r
+                       next;\r
+               }\r
+               if (-f $subpathfull) {\r
+                       unless (@stat = stat $subpathfull) {\r
+                               print "Stat fail: $subpathfull\n";\r
+                               $returnvalue = 0;\r
+                               next;\r
+                       }\r
+                       if (abs($time - $stat[9]) > TIMEOUT_ARCH) {\r
+                               unless (unlink $subpathfull) {\r
+                                       print "Failed to remove: $subpathfull\n";\r
+                                       $returnvalue = 0;\r
+                                       next;\r
+                               }\r
+                               print "Removed: $subpathfull\n";\r
+                       }\r
+                       else {\r
+                               $returnvalue = 0;\r
+                       }\r
+               }\r
+               elsif (-d $subpathfull) {\r
+                       unless (cleardir($subpathfull.'/')) {\r
+                               $returnvalue = 0;\r
+                               next;\r
+                       }\r
+                       unless (rmdir($subpathfull.'/')) {\r
+                               print "Failed to remove: $subpathfull\/\n";\r
+                               $returnvalue = 0;\r
+                               next;\r
+                       }\r
+                       print "Removed: $subpathfull\/\n";\r
+               }\r
+       }\r
+       closedir($dir);\r
+       return $returnvalue;\r
+}\r
+print "\n";\r
diff --git a/config.1.txt b/config.1.txt

index f1c983bee7f6e941283230bc508f6a99f17b8b46..f5b5182ffdff29e2b114acd8317c8d474d63b6d0 100644 (file)
--- a/config.1.txt
+++ b/config.1.txt
@@ -1,3 +1,4 @@
+# config.txt is generated from config.1.txt
  ################################################################################
  #copy this to your Apache2 configuration,
  #remember to make the server listen on these ports:
@@ -23,8 +24,8 @@
  ###CGI_ALIAS;
         
         SSLEngine on
-       SSLCertificateFile    /etc/apache2/ssl/botm.crt
-       SSLCertificateKeyFile /etc/apache2/ssl/botm.key
+###SSL_CERT;
+###SSL_KEY;
         
         #SSLOptions +StdEnvVars
         
@@ -61,3 +62,5 @@ coredump_dir /var/spool/squid
  ################################################################################
  #Copy this to your crontab:
  ###RM_ACCESS_CRONTAB;
+###CLEARARCH_CRONTAB;
+###OLDLOGS_CRONTAB;
diff --git a/configure.pl b/configure.pl

index 3fcd1e7914600296ab2e35a8bf98a05e4e238ed6..a0900bc48b234c0f6c7aed73e64599e33629ef6e 100755 (executable)
--- a/configure.pl
+++ b/configure.pl
@@ -1,5 +1,12 @@
  #!/usr/bin/perl
  
+# The proxy software, when run on a server, will use different directories,
+# host names, tcp ports, etc. than the server on which this software was
+# originally written.
+# These things are defined in the file 'settings'.
+# This script is called from the makefile. It reads the settings file and
+# inserts the information in the source files.
+
  unless ($ARGV[0]) {
         print STDERR "Configfile missing.\n";
         exit 1;
@@ -10,6 +17,8 @@ unless (open $configfile, "<", $ARGV[0]) {
         exit 2;
  }
  
+# Read the config file, line format:
+# some_name = some value # some comment
  while ($line = <$configfile>) {
         $line =~ s/[\r\n]//g;
         $line =~ s/#.*$//;
@@ -22,9 +31,13 @@ while ($line = <$configfile>) {
  }
  close ($configfile);
  
+# Now generate things to be inserted.
+
  $def{'UNLOCK_LOG'}        = "use constant UNLOCK_LOG         => '".$set{'log_path'}."unlock.log';";
  $def{'CURL_PATH'}         = "use constant CURL_PATH          => '".$set{'curl'}."';";
+$def{'GZIP_PATH'}         = "use constant GZIP_PATH          => '".$set{'gzip'}."';";
  $def{'DATA_PATH'}         = "use constant DATA_PATH          => '".$set{'data_path'}."';";
+$def{'LOG_PATH'}          = "use constant LOG_PATH           => '".$set{'log_path'}."';";
  $def{'TEMP_PATH'}         = "use constant TEMP_PATH          => '".$set{'tmp_path'}."';";
  $def{'PASS_PATH'}         = "use constant PASS_PATH          => '".$set{'data_path'}."pass/';";
  $def{'ARCH_PATH'}         = "use constant ARCH_PATH          => '".$set{'data_path'}."archive/';";
@@ -37,6 +50,10 @@ $def{'BLOCK_HOST'}        = "use constant BLOCK_HOST         => qr/".$set{'block
  $def{'BLOCK_PORT'}        = 'use constant BLOCK_PORT         => qr/^((0*'.$set{'http_proxy_port'}.')|('.$set{'https_proxy_port'}.')|('.$set{'ssl_proxy_port'}.'))$/;';
  $def{'TIMEOUT_UNLOCK'}    = "use constant TIMEOUT_UNLOCK     => ".$set{'timeout_unlock'}.";";
  $def{'TIMEOUT_INACT'}     = "use constant TIMEOUT_INACT      => ".$set{'timeout_inact'}.";";
+$def{'TIMEOUT_ARCH'}      = "use constant TIMEOUT_ARCH       => ".$set{'timeout_arch'}.";";
+$def{'LOG_SIZE_LIMIT'}    = "use constant LOG_SIZE_LIMIT     => ".$set{'log_size_limit'}.";";
+$def{'LOGS_UNCOMPRESSED'} = "use constant LOGS_UNCOMPRESSED  => ".$set{'logs_uncompressed'}.";";
+$def{'LOGS_TOTAL'}        = "use constant LOGS_TOTAL         => ".$set{'logs_total'}.";";
  $def{'REWRITE_URL'}       = "use constant REWRITE_URL        => '".$set{'https_proxy_domain'}.":".$set{'https_proxy_port'}."';";
  
  $def{'PATH'} = "\$ENV{'PATH'} = '".$set{'path'}."';";
@@ -68,7 +85,11 @@ $def{'HTTP_PORT_SSL'}     = 'http_port '.$set{'ssl_proxy_port'};
  $def{'EXTERNAL_ACL'}      = 'external_acl_type unlocked-check ttl=15 negative_ttl=0 %SRC '.$set{'bin_path'}.'access';
  $def{'EXTERNAL_REWRITE'}  = 'url_rewrite_program '.$set{'bin_path'}.'rewrite';
  $def{'UNLOCK_DOMAIN_ACL'} = 'acl unlockdomain dstdomain '.$set{'unlock_domain'};
+
  $def{'RM_ACCESS_CRONTAB'} = $set{'rm_access_crontab'}.' '.$set{'rm'}.' '.$set{'data_path'}.'access/*';
+$def{'CLEARARCH_CRONTAB'} = $set{'cleararch_crontab'}.' '.$set{'bin_path'}.'cleararch >> '.$set{'log_path'}.'cleararch.log';
+$def{'OLDLOGS_CRONTAB'}   = $set{'oldlogs_crontab'}.' '.$set{'bin_path'}.'oldlogs';
+
  
  $def{'CC'} = 'CC='.$set{'gcc'};
  $def{'CF'} = 'CF='.$set{'c_flags'};
@@ -80,7 +101,9 @@ $def{'OD'} = 'OD='.$set{'bin_path'};
  $def{'CM'} = 'CM='.$set{'chmod'};
  
  
-
+# Now go through input file, find lines to be replaced. Format:
+# ###SOME_NAME;
+# If found - replace.
  
  while ($line = <STDIN>) {
         $line =~ s/[\r\n]//g;
@@ -91,7 +114,3 @@ while ($line = <STDIN>) {
                 print "$line\n";
         }
  }
-
-       
-       
-       
-\ No newline at end of file
diff --git a/make.sh b/make.sh

index ed4980f3426a6a8c7a89205329b9b002d672ace7..f584d54854b5d2ce396f35dd5970c1e7b42e9e4a 100644 (file)
--- a/make.sh
+++ b/make.sh
@@ -1,5 +1,9 @@
  #!/bin/sh
  
+# This is the script for making the software. Normally, the makefile is used for
+# this purpose. But the makefile has to be generated first.
+# This script generates the makefile and then uses it and finally removes it.
+
  set -x
  perl configure.pl settings <makefile.1.mak >makefile
  make
diff --git a/makefile.1.mak b/makefile.1.mak

index f3ad9954f977a2a61787ef33b35e9a061d3a42a1..fe4e8cc8d05daf77e9ad075ab89498e9f48d1247 100644 (file)
--- a/makefile.1.mak
+++ b/makefile.1.mak
@@ -1,3 +1,5 @@
+# makefile is generated from makefile.1.mak.
+
  ###CC;
  ###CF;
  ###PL;
@@ -10,8 +12,8 @@
  all: moveout copyout remove config.txt
  
  
-moveout: proxy proxy.pl rewrite access access.pl   setuid exec
-       $(MV) proxy proxy.pl access access.pl rewrite $(OD)
+moveout: proxy proxy.pl cleararch oldlogs rewrite access access.pl   setuid exec
+       $(MV) proxy proxy.pl cleararch oldlogs access access.pl rewrite $(OD)
  
  copyout:   setuid exec
  #       $(CP) access.pl $(OD)
@@ -19,8 +21,8 @@ copyout:   setuid exec
  setuid: proxy access
         $(CM) u+s proxy access
  
-exec: rewrite access.pl proxy.pl
-       $(CM) +x rewrite access.pl proxy.pl
+exec: cleararch oldlogs rewrite access.pl proxy.pl
+       $(CM) +x cleararch oldlogs rewrite access.pl proxy.pl
  
  remove: proxy proxy.c access access.c   copyout moveout setuid exec 
         $(RM) proxy.c access.c
@@ -48,6 +50,11 @@ access.c: access.1.c   configure.pl settings
  access: access.c
         $(CC) $(CF) -o access access.c
  
+cleararch: cleararch.1.pl   configure.pl settings
+       $(PL) configure.pl settings <cleararch.1.pl >cleararch
+
+oldlogs: oldlogs.1.pl   configure.pl settings
+       $(PL) configure.pl settings <oldlogs.1.pl >oldlogs
  
  config.txt: config.1.txt   configure.pl settings
         $(PL) configure.pl settings <config.1.txt >config.txt
diff --git a/oldlogs.1.pl b/oldlogs.1.pl

new file mode 100644 (file)

index 0000000..65e9497
--- /dev/null
+++ b/oldlogs.1.pl
@@ -0,0 +1,77 @@
+###PERL;\r
+\r
+# oldlogs is generated from oldlogs.1.pl.\r
+#\r
+# This script renames log files if they are big enough.\r
+# Compresses or removes older log files.\r
+\r
+###LOG_PATH;\r
+###GZIP_PATH;\r
+###LOG_SIZE_LIMIT;\r
+###LOGS_TOTAL;\r
+###LOGS_UNCOMPRESSED;\r
+\r
+if ( opendir ($dir, LOG_PATH)) {\r
+       while ($subpath = readdir $dir) {\r
+               if ($subpath !~ /\.log$/) {\r
+                       next;\r
+               }\r
+               $fullpath=LOG_PATH.$subpath;\r
+               unless (-f $fullpath) {\r
+                       next;\r
+               }\r
+               unless (@stat = stat($fullpath)) {\r
+                       next;\r
+               }\r
+               if ($stat[7] > LOG_SIZE_LIMIT) {\r
+                       movelog($fullpath,0,0);\r
+               }\r
+               \r
+       }\r
+       closedir($dir);\r
+}\r
+\r
+sub movelog {\r
+       (my $path, my $number, my $gz) = @_;\r
+       my $nextgz = 0;\r
+       my $thispath;\r
+       my $nextpath;\r
+       my $nextnumber=$number+1;\r
+       my @gzip_arg = (GZIP_PATH, '-q', '-9','-f');\r
+       \r
+       $thispath = $path.(($number != 0)?'.'.$number.($gz?'.gz':''):'');\r
+       if ($number == LOGS_TOTAL) {\r
+               if (unlink $thispath) {\r
+                       return 1;\r
+               }\r
+               else {\r
+                       return 0;\r
+               }\r
+       }\r
+       if ($number == LOGS_UNCOMPRESSED) {\r
+               $nextgz=1;\r
+               $nextpath = $path.'.'.$nextnumber.'.gz';\r
+       }\r
+       else {\r
+               $nextpath = $path.'.'.$nextnumber.($gz?'.gz':'');\r
+       }\r
+       \r
+       if (-e $nextpath) {\r
+               unless (movelog($path,$nextnumber,($nextgz or $gz)?1:0)) {\r
+                       return 0;\r
+               }\r
+       }\r
+       \r
+       if ($nextgz) {\r
+               push @gzip_arg, $thispath;\r
+               unless (! system (@gzip_arg)) {\r
+                       return 0;\r
+               }\r
+               $thispath .= '.gz';\r
+       }\r
+       \r
+       unless (rename ($thispath, $nextpath)) {\r
+               return 0;\r
+       }\r
+       return 1;\r
+}\r
diff --git a/proxy.1.c b/proxy.1.c

index edf32c400fced9f2083f8148ff7868f65a7b036f..85fa4aa91fc4ebe922803d443a77aa054b728f4c 100644 (file)
--- a/proxy.1.c
+++ b/proxy.1.c
@@ -1,4 +1,9 @@
-//The SETUID wrapper.
+// proxy.c is generated from proxy.1.c
+//
+// This is the wrapper for proxy.pl.
+// It's run with SETUID to have accesss to some files where the www server
+// should not. That's why it has a C wrapper. In modern systems running scripts
+// directly with SETUID is considered unsafe and not allowed.
  
  #include <unistd.h>
  #include <stdio.h>
diff --git a/proxy.1.pl b/proxy.1.pl

index bf53924e899dd01711e772e06d471477ddf5be1c..114c55b7fb1607bcb1585859a858f2cef5bf069d 100755 (executable)
--- a/proxy.1.pl
+++ b/proxy.1.pl
@@ -1,5 +1,38 @@
  ###PERL;
  
+# proxy.pl is generated from proxy.1.pl
+#
+# This is the proxy software. It's launched as a CGI program by the http server.
+#
+# The proxy:
+# - determines information about the http(s) request,
+# - checks if user is allowed to use it,
+# - forwards the requests using curl,
+# - sends the response back to the user,
+# - saves a redundant copy of the response.
+#
+# It's run with SETUID to have accesss to some files where the www server
+# should not. That's why it has a C wrapper. In modern systems running scripts
+# directly with SETUID is considered unsafe and not allowed.
+# When running with SETUID the program also has to deal with Perl's built-in
+# securuity.
+#
+# Any data comming from standard input, files, environment variables, command
+# line is treated by Perl as unsafe. Any data created by operations on unsafe
+# data is also unsafe. Perl will not allow to use such data to open files (for
+# writing), launch programs, etc.
+# That's why the data has to be validated and made safe, like this:
+# if ($variable =~/^(some-regex)$/) {
+#   $variable = $1;
+# }
+#
+# see also:
+# http://perldoc.perl.org/perlsec.html#Taint-mode
+# https://tools.ietf.org/html/rfc2616
+# https://tools.ietf.org/html/rfc3875
+# https://tools.ietf.org/html/rfc2396
+
+
  use POSIX qw(strftime);
  
  ###UNLOCK_LOG;
@@ -27,10 +60,12 @@ $timeout_inact = TIMEOUT_INACT*60;
  delete @ENV{qw(IFS CDPATH ENV BASH_ENV)};
  ###PATH;
  
+# If the special URL is used, instaed of proxying there will be proxy unlocking.
  if (($ENV{'HTTP_HOST'} =~ UNLOCK_PROXY_HOST) and (urldecode($ENV{'PATH_INFO'}) =~ UNLOCK_PROXY_PATH)){
         unlock();
  }
  else {
+       # proxy will work only if unlocked
         if(access()) {
                 proxy();
         }
@@ -39,15 +74,23 @@ else {
         }
  }
  
+# The proxying function
  sub proxy {
-       # srand(time()+$$);
+       # The required information is stored in environment variables (rfc3875).
+       # The HTTPS variable is not defined in rfc3875 (might be a portability
+       # problem?)
         
-       $archive = 0;
+       $archive = 0; # if this http request should be archived
         
+       # The proxy will not support request methods  other than GET, POST, HEAD.
+       # There is no need for it. 
         if ($ENV{'REQUEST_METHOD'} !~ /^(HEAD|GET|POST)$/) {
                 return fail("Status: 405 Method Not Allowed\nAllow: GET, POST, HEAD\n","405 Method Not Allowed","The proxy does not support the $ENV{'REQUEST_METHOD'} method.");
         }
         
+       # The proxy may be presented with either a complete URL (when it's a HTTP
+       # proxy) or just the path (when it's a HTTPS proxy).
+       # In this case the  URL has to be reconstructed.
         if ($ENV{'REQUEST_URI'} =~ /^[a-z]*(:[0-9]*)?:\/\/.*$/){
                 $URL=$ENV{'REQUEST_URI'};
         }
@@ -55,7 +98,10 @@ sub proxy {
                 $URL=($ENV{'HTTPS'} eq 'on' ? 'https://' : 'http://').$ENV{'HTTP_HOST'}.$ENV{'REQUEST_URI'};
         }
         
-       if (validateurl($URL)) { #validate URL against illegal characters
+       # The URL will now be validated.
+       # Additionally it will be divided in parts which will be stored in variables:
+       # $prot (protocol), $host, $port, $path, $query.
+       if (validateurl($URL)) {
                 $URL =~ /^(.*)$/;
                 $URL = $1;
         }
@@ -63,35 +109,45 @@ sub proxy {
                 return fail("Status: 400 Bad Request\n","400 Bad Request","Invalid URL: $URL.");
         }
         
+       # If port not defined default value is used.
         if ($port eq ''){
                 $port=($ENV{'HTTPS'} eq 'on' ? '443' : '80');
         }
-               
-       if ($host =~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/) {#IP instead of hostname
+       
+       # The host can be a hostname or IP.
+       # A request to a private IP number gives users access to the proxy's local
+       # network. This should not be allowed.
+       if ($host =~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/) {
                 if ($host =~ /^((0*10\..*)|(0*127\..*)|(0*172\.0*((1[6-9])|(2[0-9])|(3[0-1]))\..*)|(0*192\.0*168\..*))$/) {
                         return fail("Status: 403 Forbidden\n","403 Forbidden","The proxy does not accept a private IP address $host as hostname.");
                 }
         }
         
+       # Of course, if there is a web server inside the local network and is accessed
+       # by its hostname it can be accessible. But the server which runs the proxy
+       # can have some hostnames defined that point to computers in local network
+       # that are not available from outside. These should be blocked as well.
         if ($host =~ BLOCK_HOST) {
                 return fail("Status: 403 Forbidden\n","403 Forbidden","The proxy does not accept $host as hostname.");
         }
         
+       # The proxy will block any access with a port number that is used by the
+       # proxy. Otherwise it would be possible to create an infinite loop in which
+       # the request is proxied back to the proxy again. Of course, to create such a
+       # loop not only the port number but also the host name is required. But is not
+       # always easy (or possible) to predict if a hostname will point to the proxy
+       # or not. Blocking port numbers is easier.
+       #That's why the proxy should not be installed on the default port numbers, 80
+       # or 443 because then it will not work!
         if ($port =~ BLOCK_PORT) {
                 return fail("Status: 403 Forbidden\n","403 Forbidden","The proxy does not accept port number $port because of infinite loop prevention.");
         }
         
-       # do {
-               # $temppath=TEMP_PATH.int(rand(100000000)).'_';
-               # $headpath=$temppath.'h';
-               # $downpath=$temppath.'d';
-               # $uppath=$temppath.'u';
-       # } while (( -e $headpath) or ( -e $downpath) or ( -e $uppath));
-       
-       @curl_arg=(CURL_PATH,'-i','-#'); #-s
-       @reqhead;
+       @curl_arg=(CURL_PATH,'-i','-#'); # the command line arguments for curl
+       @reqhead;                        # the HTTP request header
         
         foreach $envk (keys %ENV) { 
+               # The relevant http variables either start with HTTP_ or CONTENT_.
                 if ($envk =~ /^(HTTP_[A-Z0-9_]+)$/) {
                         $headname=formatheader($1);
                 }
@@ -101,12 +157,15 @@ sub proxy {
                 else {
                         next;
                 }
+               # test against illegal characters
                 if ($ENV{$envk} =~ /^([\x20-\x7e]*)$/) {
                         $headval=$1;
                 }
                 else {
                         next;
                 }
+               
+               # Some header fields should not be forwarded.
                 if(lc($headname) =~ REQUEST_HEADER_BLOCK) {
                         next;
                 }
@@ -117,6 +176,10 @@ sub proxy {
                 push @curl_arg, $headarg;
                 push @reqhead, $headarg;
         }
+       
+       # curl uses its default values of User-agent and Accept header fields unless
+       # they are defined in the command line.
+       # If the original request doesn't contain these fields this will undefine them:
         unless (exists $ENV{'HTTP_USER_AGENT'}) {
                 push @curl_arg, '-H';
                 push @curl_arg, 'User-Agent:';
@@ -131,65 +194,45 @@ sub proxy {
         }
         
         if ($ENV{'REQUEST_METHOD'} eq 'POST'){
-               # binmode(STDIN) or return fail("Status: 500 Internal Server Error\n","500 Internal Server Error","Failed to switch STDIN to binary mode.");
-               # open($upfile,">",$uppath) or return fail("Status: 500 Internal Server Error\n","500 Internal Server Error","Failed to create temporary file.");
-               # binmode ($upfile) or return fail("Status: 500 Internal Server Error\n","500 Internal Server Error","Failed to switch temporary file to binary mode.");
-               
-               # $buffer;
-               # while (read (STDIN,$buffer,65536)) {
-                       # unless (print ($upfile $buffer)) {
-                               # return fail("Status: 500 Internal Server Error\n","500 Internal Server Error","Failed to write to temporary file.");
-                       # }
-               # }
-               # close ($upfile);
-               # $craetedup=1;
-               
-               $postdata=1;
+               $postdata=1;     # if there is data to pe posted
                 push @curl_arg, '--data-binary';
-               # push @curl_arg, '@'.$uppath;
                 push @curl_arg, '@-';
-               
         }
         
-       $archpath = makepath($prot, $host, $port, $path);
+       # The path for archiving the HTTP request is made, the required directories
+       # are created.
+       # If this fails the request will not be archived but will still be proxied.
+       $archpath = makepath($prot, $host, $port, $path, $query);
         if ($archpath) {
                 $archive = 1;
-               if ($query or $postdata) {
+               if ($postdata) {
+                       # If this is a POST request the URL is not enough to identify it. To the
+                       # path will be added a number. Also the request header and post data will
+                       # be archived and not only the response header and response data.
                         for (my $i = 0; ;++$i) {
                                 $respcontpath=$archpath.'@v-'.$i;
                                 $respheadpath=$archpath.'@h-'.$i;
                                 $postheadpath=$archpath.'@g-'.$i;
                                 $postcontpath=$archpath.'@u-'.$i;
-                               $postquerpath=$archpath.'@q-'.$i;
                                 
-                               if ((! -e $respcontpath) and (! -e $respheadpath) and (! -e $postheadpath) and (! -e $postcontpath) and (! -e $postquerpath)) {
+                               if ((! -e $respcontpath) and (! -e $respheadpath) and (! -e $postheadpath) and (! -e $postcontpath)) {
                                         last;
                                 }
                         }
                         
-                       if ($query) {
-                               if (open ($postquerfile, ">", $postquerpath)) {
-                                       print $postquerfile "$query\n";
-                                       close($postquerfile);
-                               }
-                       }
-                       
-                       if ($postdata) {
-                               if (open ($postheadfile, ">", $postheadpath)) {
-                                       foreach $line (@reqhead) {
-                                               print $postheadfile "$line\n";
-                                       }
+                       # The request header is written.
+                       if (open ($postheadfile, ">", $postheadpath)) {
+                               foreach $line (@reqhead) {
                                         print $postheadfile "$line\n";
-                                       close ($postheadfile);
                                 }
+                               print $postheadfile "$line\n";
+                               close ($postheadfile);
                         }
+               
                 }
                 else {
                         $respcontpath=$archpath.'@v';
                         $respheadpath=$archpath.'@h';
-                       $postheadpath='';
-                       $postcontpath='';
-                       $postquerpath='';
                 }
         }
         else {
@@ -198,12 +241,14 @@ sub proxy {
         
         push @curl_arg, $URL;
         
+       # The pipes for communication with curl are created.
         pipe ($curloutrd, $curloutwr) or return fail("Status: 500 Internal Server Error\n","500 Internal Server Error","Failed to create stdout pipe.");
         pipe ($curlerrrd, $curlerrwr) or return fail("Status: 500 Internal Server Error\n","500 Internal Server Error","Failed to create stderr pipe.");
         if ($postdata){
                 pipe ($curlinrd, $curlinwr) or return fail("Status: 500 Internal Server Error\n","500 Internal Server Error","Failed to create stdin pipe.");
         }
         
+       # curl, the program that will send the forwarded request is launched here
         $curlpid = fork();
         unless (defined $curlpid) {
                 return fail("Status: 500 Internal Server Error\n","500 Internal Server Error","fork() failed.");
@@ -212,6 +257,7 @@ sub proxy {
                 close ($curloutrd);
                 close ($curlerrrd);
                 
+               # The input and outputs of curl are redirected to pipes.
                 open(STDERR, ">&=" . fileno($curlerrwr)) or die "Failed to open stderr pipe.\n";
                 open(STDOUT, ">&=" . fileno($curloutwr)) or die "Failed to open stdout pipe.\n";
                 
@@ -228,11 +274,19 @@ sub proxy {
         close ($curlerrwr);
         
         if($postdata) {
+               # If this  is a POST request the post data is available on the standard
+               # input. It will be piped to curl and written to a file (if successfully
+               # opened).
+               # Originally this part would be in a separate thread but:
+               # 1. It didn't work. Not sure why.
+               # 2. It was not necaessary.
+               
                 # $curlpidin = fork();
                 # unless (defined $curlpidin) {
                         # $forceexit=1;
                 # }
                 # unless ($curlpidin) {
+
                         close($curlinrd);
                         binmode($curlinwr) or die "Failed to switch stdin pipe to binary mode.\n";
                         binmode(STDIN) or die "Failed to switch STDIN to binary mode.\n";
@@ -259,36 +313,49 @@ sub proxy {
                         if ($postcontopen) {
                                 close($postcontfile);
                         }
+
                         # exit 0;
                 # }
                 # close ($curinwr);
                 # close ($curlinrd);
         }
         
-       $safelength=1;
-       $definedlength=0;
-       $safeerror=1;
-       $lastskipped=1;
-       $forceexit=0;
+       $safelength=1;    # if Content-length is safe to be forwarded 
+       $definedlength=0; # if Content-length is defined
+       $safeerror=1;     # if it's safe to send an error message.
+       $lastskipped=1;   # if the previous header field was not forwarded
+       $forceexit=0;     # if the proxy should exit with error
         
         if ($line = <$curloutrd>) {
                 $line =~ s/\r?\n$//;
+               # The first line is the status. 
                 if ($line =~ /^HTTP\/[0-9]+\.[0-9]+ ([0-9]+ .*)$/) {
                         $status =$1;
                         @resphead=('Status: '.$status);
                         
+                       # If the status is not 2XX and this URL was already archived before it
+                       # should not be overwritten.
+                       # Otherwise it can happen that, for example, after refreshing the content
+                       # previously downloaded will be overwritten with a "304 Not Modified" and
+                       # lost.
+                       
                         if(($status !~ /^2[0-9][0-9]/) and (-e $respcontpath)){
                                 $archive=0;
                         }
                         
+                       # now read the header lines
                         while ($line = <$curloutrd>) {
                                 $line =~ s/[\r\n]$//g;
                                 
+                               # empty line (double \n) meane the end of the header.
                                 if($line eq ''){
                                         last;
                                 }
                                 
+                               # If line starts with space or tab, it's a continuation of the previous
+                               # line.
                                 if($line =~ /^[ \t](.*)$/){
+                                       # Continuation of a skipped line should be skipped too.
                                         if ($lastskipped) {
                                                 next;
                                         }
@@ -308,6 +375,9 @@ sub proxy {
                                         next;
                                 }
                                 
+                               # If one of these header fields is present it can not be assumed that
+                               # the received value of Content-length is also correct for the forwarded
+                               # content.
                                 if($headname eq 'transfer-encoding') {
                                         $safelength = 0;
                                 }
@@ -322,6 +392,8 @@ sub proxy {
                                         $headvalue =~ s/[\r\n \t]*$//;
                                         $contentlength = int($headvalue);
                                 }
+                               
+                               # Some header fields should not be forwarded.
                                 if($headname =~ RESPONSE_HEADER_BLOCK) {
                                         $lastskipped =1;
                                         next;
@@ -331,12 +403,16 @@ sub proxy {
                                 push @resphead, $line;
                         }
                         
-                       if ($lengthdefined and ($lengthsafe or ($contentlength==0))) {
+                       # Only add the Content-length header if it's safe to do so.
+                       if ($lengthdefined and ($safelength or ($contentlength==0))) {
                                 push @resphead, 'Content-length: '.$contentlength;
                         }
                         
+                       # After forwarding the response header it's too late to send any error
+                       # messages.
                         $safeerror=0;
                         
+                       # Forward the header and safe it to file if possible and allowed.
                         if ($archive) {
                                 $respheadopen = open($respheadfile, ">", $respheadpath);
                         }
@@ -352,15 +428,16 @@ sub proxy {
                                 close $respheadfile;
                         }
                         
+                       # Now forward the actual content and safe it to file if possible and
+                       # allowed.
                         if ( ! binmode ($curloutrd)) {
-                               # print "FAIL 1\n";
                                 $forceexit=1;
                         }
                         elsif ( ! binmode (STDOUT) ) {
-                               # print "FAIL 2\n";
                                 $forceexit=1;
                         }
                         else {
+                               # Create the file only if there was content received. No 0-byte files.
                                 $firstline=1;
                                 while (read ($curloutrd,$buffer,1024)) {
                                         unless (print (STDOUT $buffer)) {
@@ -392,9 +469,11 @@ sub proxy {
                 }
         }
         
+       # Ok, done. Now wait for curl to exit. (Should have already happened).
         
         waitpid($curlpid,0);
         if ($? != 0) {
+               # Generate error message if possible.
                 if ($safeerror) {
                         $errormsg='';
                                 while ($line = <$curlerrrd>) {
@@ -416,6 +495,7 @@ sub proxy {
                 # }
         # }
         
+       # Send error message if it;s safe to do so.
         if($safeerror){
                 return fail("Status: 500 Internal Server Error\n","500 Internal Server Error",$errormsg);
         }
@@ -425,31 +505,44 @@ sub proxy {
         
  }
  
-sub access { #kind of doubles the functionality of access.pl but for http
+# This function checks if the user has unlocked the proxy. Nonzero means yes.
+sub access { 
         if($ENV{'HTTP_HOST'} =~ UNLOCK_PROXY_HOST){
                 return 1;
         }
+       
+       # Check if IP
         if ($ENV{'REMOTE_ADDR'} =~ /^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)$/) {
                 $IP = $1;
+               
+               # 127.0.0.1 is always allowed. All https requests are tunnelled through
+               # 127.0.0.1.
                 if ($IP eq '127.0.0.1') {
                         return 1;
                 }
+               
+               # If access file doesn't exist the proxy is not unlocked for this IP.
                 $accesspath=ACCESS_PATH.$IP;
                 if (! (-e $accesspath)) {
                         return 0;
                 }
                 
                 elsif (open ($accessfile,"+<",$accesspath)) {
+                       # The access file has to be locked otherwise one process may read it while
+                       # another one updates it.
                         unless (flock ($accessfile, 2)) {
                                 close ($accessfile);
                                 return 0;
                         }
+                       # First line is the timestamp of proxy unlocking. Second line is the
+                       # timestamp of last access.
                         $unlocktime=<$accessfile>;
                         $lasttime=<$accessfile>;
  
                         $unlocktime =~ s/[\r\n]//g;
                         $lasttime =~ s/[\r\n]//g;
                         
+                       # The timestamps must be numbers.
                         if ($unlocktime =~ /^([0-9]+)$/) {
                                 $unlocktime=int($1);
                         }
@@ -467,11 +560,14 @@ sub access { #kind of doubles the functionality of access.pl but for http
                         }
                         
                         if ((abs($accesstime-$unlocktime)>$timeout_unlock) or (abs($accesstime-$lasttime)>$timeout_inact)){
+                               # If too much time passed proxy is not unlocked any more. Also the
+                               # access file is now useless and will be removed.
                                 close ($accessfile);
                                 unlink $accesspath;
                                 return 0;
                         }
                         else {
+                               # the proxy is unlocked. The access file will now be updated.
                                 if (seek($accessfile, 0, 0)) {
                                         print $accessfile "$unlocktime\n$accesstime\n";
                                         truncate ($accessfile , tell($accessfile));
@@ -489,15 +585,21 @@ sub access { #kind of doubles the functionality of access.pl but for http
         }
  }
  
+# Function to unlock the proxy.
  sub unlock {
         if ($ENV{'REMOTE_ADDR'} =~ /^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)$/) {
                 $IP=$1;
         }
         
+       # If the request method is GET the CGI parameters must be taken from the
+       # query string (part of URL).
         if ($ENV{'REQUEST_METHOD'} eq 'GET') {
                 %CGI=getcgi($ENV{'QUERY_STRING'});
         }
+       # If request method is POST the CGI parameters must be taken from standard
+       # input.
         elsif ($ENV{'REQUEST_METHOD'} eq 'POST'){
+               #Check content-type. Multipart is not supported.
                 if ($ENV{'CONTENT_TYPE'} eq 'application/x-www-form-urlencoded'){
                         %CGI=getcgi( <STDIN> );
                 }
@@ -509,12 +611,13 @@ sub unlock {
                 return unlockpage("Unsupported method: $ENV{'REQUEST_METHOD'}.","Status: 405 Method Not Allowed\nAllow: GET, POST\n");
         }
         
+       # If there was an IP in the form it must be used instead.
         if ($CGI{'ip'} =~ /^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)$/) {
                 $IP=$1;
         }
         
         if ($IP eq '') {
-               return unlockpage("$Invalid IP.","Status: 403 Forbidden\n");
+               return unlockpage("Invalid IP.","Status: 403 Forbidden\n");
         }
         
         if ($CGI{'username'} eq ''){
@@ -530,21 +633,25 @@ sub unlock {
         }
         
         $passpath = PASS_PATH.$CGI{'username'};
-       
+       # Open file to read user's password.
         open($passfile, "<", $passpath) or return unlockpage('Wrong username or password.',"Status: 403 Forbidden\n");
         $pass = <$passfile>;
         close($passfile);
         $pass =~ s/[\r\n]//g;
+       # The password is stored in URL-encoded form.
         $pass = urldecode($pass);
         
         if ($pass ne $CGI{'password'}){
                 return unlockpage('Wrong username or password.',"Status: 403 Forbidden\n");
         }
         
+       # All unlocking access must be logged: time, IP, user. The server owner must
+       # be able to know who did what with the proxy (in  case of legal problems).
         open ($logfile, ">>", UNLOCK_LOG) or return unlockpage("Couldn't log your action.","Status: 500 Internal Server Error\n");
         print $logfile strftime("%d.%m.%Y %H:%M:%S", gmtime($accesstime))."  $IP  $CGI{'username'}\n";
         close($logfile);
         
+       # Ok, unlocked. Create access file.
         $accesspath=ACCESS_PATH.$IP;
         open ($accessfile,">",$accesspath) or return unlockpage("$accesspath","Status: 403 Forbidden\n");
         print $accessfile "$accesstime\n$accesstime\n";
@@ -554,6 +661,8 @@ sub unlock {
         
  }
  
+# The function to get CGI parameters from string.
+# Format is: name=url_encoded_value&name=url_encoded_value& ... &name=url_encoded_value
  sub getcgi {
         my $arg;
         my $varl;
@@ -568,6 +677,7 @@ sub getcgi {
         return %cgi;
  }
  
+# Function for decoding URL-encoded text
  sub urldecode {
         my $t = $_[0];
         $t =~ s/\+/ /g;
@@ -575,6 +685,11 @@ sub urldecode {
         return $t;
  }
  
+# Function for formatting header field names.
+# The Environment variable name is like this: HTTP_HEADER_NAME or HEADER_NAME.
+# But should be Header-Name.
+# Actually the names are case-insensitive but in practise they often look like
+# this. (Otherwise suspicious?)
  sub formatheader {
         my $t = $_[0];
         $t =~ s/^HTTP_//;
@@ -584,6 +699,9 @@ sub formatheader {
         return $t;
  }
  
+# Function for showing the proxy unlock form page.
+# arguments: 1 - additional message (optional), 2 - additional header fields
+# (optional)
  sub unlockpage {
         (my $message, my $header)=@_;
         if($header ne ''){
@@ -616,6 +734,8 @@ sub unlockpage {
         print "</ul></body></html>\n";
  }
  
+# Function for showing error messages.
+# Arguments: 1 - header fields, 2 - page title, 3 - error message text.
  sub fail {
         (my $header, my $title, my $message)=@_;
         if($header ne ''){
@@ -638,6 +758,7 @@ sub fail {
         print "</body></html>\n";
  }
  
+# Function to show the message after unlocking the proxy.
  sub unlockedpage {
         print "Content-type: text/html\n\n";
         print '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "">';
@@ -654,6 +775,7 @@ sub unlockedpage {
         print "</ul></body></html>\n";
  }
  
+# Function to show the message when proxy not unlocked.
  sub noaccess {
         print "Status: 403 Forbidden\n;";
         print "Content-type: text/html\n\n";
@@ -669,6 +791,7 @@ sub noaccess {
         print "</body></html>\n";
  }
  
+# Function to show debug information. Not used any more.
  sub debag {
         print "Content-type: text/plain\n\n";
         foreach $envk (keys %ENV) { 
@@ -690,6 +813,8 @@ sub debag {
         print 'URL: ',$URL,"\n";
  }
  
+# Function to check URL and divide in parts: protocol, hostname, port number,
+# path, query string.
  sub validateurl {
         my $url = $_[0];
         $prot;
@@ -761,12 +886,15 @@ sub validateurl {
         return 1;       
  }
  
+# Function to convert URL to archive path. Also creates required directories.
+# Returns the path on success and empty string on fail.
+# Inspired by the MIRA browser.
  sub makepath {
-       #inspired by the MIRA browser!
-       (my $prot, my $host, my $port, my $path) = @_;
+       (my $prot, my $host, my $port, my $path, my $query) = @_;
         
         my $archpath = ARCH_PATH;
         
+       # First subdir: protocol @p port_number
         if ($prot =~ /^(https?)$/) {
                 $archpath .= $1;
         }
@@ -785,6 +913,18 @@ sub makepath {
                 }
         }
         
+       # Host name: each segment between dots is a subdir, revedrsed order, last part
+       # ends with @n.
+       # For example www.example.com becomes
+       # ... /com/example/www@/ ...
+       
+       # When segment is longer than 120 characters it's divided into 64 character
+       # parts ending with '-' until the last one is not longer than 120.
+       # For example
+       # www.aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz.com
+       # becomes
+       # ... /com/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggg-/gggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmm-/mmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz/www@n/ ...
+       
         if($host =~ /^([A-Za-z0-9\-\.]+)$/) {
                 $host = $1;
         }
@@ -832,13 +972,30 @@ sub makepath {
                 }
         }
         
+       # Path: each segment between '/' is subdir. The segment after the last '/' is
+       # not a subdir - it is part of the file name. And it will be appended with
+       # some characters outside of this function: @g, @h, @u or @v.
+       # Exception: when there is a query string the last segment is a subdir too and
+       # ends  with q.
+       # Characters that are not letters or numbers or '_' or '.' are encoded. Like
+       # in URL-encoding but with '@' instead of '%'.
+       # When segment is longer than 120 characters it's divided into 64 character
+       # parts ending with '-' until the last one is not longer than 120.
+       
+       # For example:
+       # /some/path/?a=1  becomes  ... /some/path@q/ ...
+       # /some/other-path  becomes  ... /some/other@2Dpath
+       # /path/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz/yyy
+       # becomes
+       # ... /path/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggg-/gggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmm-/mmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz/yyy
+       
         $path =~ s/^\///;
         
         while ((my $ind = index ($path, '/'))>=0) {
                 my $part = substr $path, 0, $ind;
                 $path= substr $path, $ind+1;
                 
-               $part =~ s/[^A-Za-z0-9_\.]/sprintf ("@%02X",ord($1))/eg;
+               $part =~ s/([^A-Za-z0-9_\.])/sprintf ("@%02X",ord($1))/eg;
                 
                 while (length ($part) > 120) {
                         $archpath .= '/'.substr($part,0,64).'-';
@@ -858,7 +1015,7 @@ sub makepath {
                         }
                 }
         }
-       $path =~ s/[^A-Za-z0-9_\.]/sprintf ("@%02X",ord($1))/eg;
+       $path =~ s/([^A-Za-z0-9_\.])/sprintf ("@%02X",ord($1))/eg;
         while (length ($path) > 120) {
                 $archpath .= '/'.substr($path,0,64).'-';
                 $path = substr($path,64);
@@ -869,6 +1026,46 @@ sub makepath {
                         }
                 }
         }
+       
+       if ($query) {
+               # query string: The last part is not a subdir - it is part of the file name.
+               # And it will be appended with some characters outside of this function: @g,
+               # @h, @u or @v.
+               # When Query string is longer than 240 characters it's divided into 128
+               # character parts ending with '-' until the last one is not longer than 240.
+               # Characters that are not letters or numbers or '_' or '.' are encoded. Like
+               # in URL-encoding but with '@' instead of '%'.
+               # When segment is longer than 120 characters it's divided into 64 character
+               # parts ending with '-' until the last one is not longer than 120.
+               # The '?' at the beginning is not part of the query string.
+               # For example:
+               # ?a=1&b=%25%5E%26 becomes ... /a@3D1@26b@3D@2525@255E@2526
+               # ?a=aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz&b=aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz
+               # becomes
+               # ... /a@3Daaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmm-/mmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz@26@b@3Daaaaaaaaaabbbb-/bbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz
+               $query =~ s/([^A-Za-z0-9_\.])/sprintf ("@%02X",ord($1))/eg;
+               
+               $archpath .= '/'.$path.'@q';
+               
+               $path = $query;
+               unless (-d $archpath)
+               {
+                       unless (mkdir $archpath) {
+                               return '';
+                       }
+               }
+               
+               while (length ($path) > 240) {
+                       $archpath .= '/'.substr($path,0,128).'-';
+                       $path = substr($path,128);
+                       unless (-d $archpath)
+                       {
+                               unless (mkdir $archpath) {
+                                       return '';
+                               }
+                       }
+               }
+       }
         $archpath .= '/'.$path;
         return $archpath;
  }
diff --git a/rewrite.1.pl b/rewrite.1.pl

index 1578162cfe1bbd4f637d9d061d98a80fe765a069..1ffe9edb08c0f9871adb1cae69dc79812590079c 100755 (executable)
--- a/rewrite.1.pl
+++ b/rewrite.1.pl
@@ -1,5 +1,13 @@
  ###PERL;
  
+# rewrite is generated from rewrite.1.pl
+#
+# This is the Squid helper program used for redirection. It always redirects to
+# the https proxy.
+#
+# see also:
+# http://wiki.squid-cache.org/Features/AddonHelpers#URL_Re-Writing_.28Mangling.29
+
  ###REWRITE_URL;
  
  $|=1;
diff --git a/settings b/settings

index 09502afc116b611cf82653747712c8a74477fccd..fb844a81da0b3a2669e480cf145595651e946932 100644 (file)
--- a/settings
+++ b/settings
@@ -20,10 +20,11 @@ server_admin       = bicyclesonthemoon@chirpingmustard.info
  
  # No matter what key you use there will be ALWAYS an unavoidable certifficate
  # mismatch warning. Because the proxy does an equivalent to a MITM attack. 
-ssl_key  = /etc/apache2/ssl/botm.key
-ssl_cert = /etc/apache2/ssl/botm.crt
+ssl_key  = /etc/apache2/ssl/proxy.key
+ssl_cert = /etc/apache2/ssl/proxy.crt
  
-#doesn't have to be a real domain
+# The domain and path used for proxy unlocking
+# doesn't have to be a real domain
  unlock_domain       = yplom.bicyclesonthemoon.info
  unlock_path         = /proxy/unlock
  unlock_domain_regex = ^yplom\.bicyclesonthemoon\.info(:[0-9]*)?$
@@ -37,6 +38,9 @@ block_host_regex = ^(localhost|(botcastle[0-9]*))$
  timeout_unlock = 90
  timeout_inact  = 15
  
+#Time in seconds
+timeout_arch = 172800 # how old files must be to safely remove them
+
  path    = /usr/local/bin:/usr/bin:/bin
  perl    = /usr/bin/perl
  curl    = /usr/bin/curl
@@ -45,6 +49,14 @@ cp      = /bin/cp
  mv      = /bin/mv
  rm      = /bin/rm
  gcc     = /usr/bin/gcc
+gzip    = /bin/gzip
  c_flags = -g -Wall
  
-rm_access_crontab = 0 0 * * * #How often to remove leftover unlock info.
+log_size_limit    = 65536 # How big can a log file be
+logs_uncompressed =     2 # How many uncompressed old logs to keep
+logs_total        =    10 # How many old logs to keep 
+
+rm_access_crontab =  0 1 * * * # How often to remove leftover unlock info.
+cleararch_crontab = 30 0 * * * # How often to clear the archive from old files.
+oldlogs_crontab   =  0 0 * * * # How often to deal with old logs
+# Not a good idea to launch oldlogs just after cleararch, I think.
author	b <b@05ce6ebb-7522-4a6e-a768-0026ae12be9f>
	Wed, 18 Nov 2015 21:59:21 +0000 (21:59 +0000)
committer	b <b@05ce6ebb-7522-4a6e-a768-0026ae12be9f>
	Wed, 18 Nov 2015 21:59:21 +0000 (21:59 +0000)
access.1.c		patch \| blob \| history
access.1.pl		patch \| blob \| history
cleararch.1.pl	[new file with mode: 0644]	patch \| blob
config.1.txt		patch \| blob \| history
configure.pl		patch \| blob \| history
make.sh		patch \| blob \| history
makefile.1.mak		patch \| blob \| history
oldlogs.1.pl	[new file with mode: 0644]	patch \| blob
proxy.1.c		patch \| blob \| history
proxy.1.pl		patch \| blob \| history
rewrite.1.pl		patch \| blob \| history
settings		patch \| blob \| history