From 6e3fdb354832c88e2b130fd673f2e092d486f0bf Mon Sep 17 00:00:00 2001
From: b <b@05ce6ebb-7522-4a6e-a768-0026ae12be9f>
Date: Wed, 18 Nov 2015 21:59:21 +0000
Subject: [PATCH] Archiving finished for now. Removing old files from the
 archive. Renaming/compressing/removing of old logs. Added comments to explain
 the code.

git-svn-id: svn://botcastle1b/yplom/proxy@10 05ce6ebb-7522-4a6e-a768-0026ae12be9f
---
 access.1.c     |   7 +-
 access.1.pl    |  41 ++++++-
 cleararch.1.pl |  67 ++++++++++
 config.1.txt   |   7 +-
 configure.pl   |  29 ++++-
 make.sh        |   4 +
 makefile.1.mak |  15 ++-
 oldlogs.1.pl   |  77 ++++++++++++
 proxy.1.c      |   7 +-
 proxy.1.pl     | 327 +++++++++++++++++++++++++++++++++++++++----------
 rewrite.1.pl   |   8 ++
 settings       |  20 ++-
 12 files changed, 524 insertions(+), 85 deletions(-)
 create mode 100644 cleararch.1.pl
 create mode 100644 oldlogs.1.pl
diff --git a/access.1.c b/access.1.c
index 160d4b8..442b827 100644
--- a/access.1.c
+++ b/access.1.c
@@ -1,4 +1,9 @@
-//The SETUID wrapper.
+// access.c is generated from access.1.c
+//
+// This is the wrapper for access.pl.
+// It's run with SETUID to have accesss to some files where the www server
+// should not. That's why it has a C wrapper. In modern systems running scripts
+// directly with SETUID is considered unsafe and not allowed.
 
 #include <unistd.h>
 #include <stdio.h>
diff --git a/access.1.pl b/access.1.pl
index fee2b63..4cd3e2f 100644
--- a/access.1.pl
+++ b/access.1.pl
@@ -1,5 +1,30 @@
 ###PERL;
 
+# access.pl is generated from access.1.pl
+#
+# This is the Squid helper program used for access control, it detects if the
+# proxy is unlocked for the user and tells Squid if it can open the tunnel or
+# not.
+#
+# It's run with SETUID to have accesss to some files where the Squid server
+# should not. That's why it has a C wrapper. In modern systems running scripts
+# directly with SETUID is considered unsafe and not allowed.
+# When running with SETUID the program also has to deal with Perl's built-in
+# securuity.
+#
+# Any data comming from standard input, files, environment variables, command
+# line is treated by Perl as unsafe. Any data created by operations on unsafe
+# data is also unsafe. Perl will not allow to use such data to open files (for
+# writing), launch programs, etc.
+# That's why the data has to be validated and made safe, like this:
+# if ($variable =~/^(some-regex)$/) {
+#   $variable = $1;
+# }
+#
+# see also:
+# http://wiki.squid-cache.org/Features/AddonHelpers#Access_Control_.28ACL.29
+# http://perldoc.perl.org/perlsec.html#Taint-mode
+
 ###ACCESS_PATH;
 ###TIMEOUT_UNLOCK;
 ###TIMEOUT_INACT;
@@ -15,15 +40,19 @@ while (<STDIN>) {
 	}
 	
 	$accesstime = time();
-	# print '+++ '.$line."\n";
 	
+	# The input line is expected to have the IP.
 	if ($line =~ /^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)( .*)?$/) {
 		$IP = $1;
+		
+		# If access file doesn't exist the proxy is not unlocked for this IP.
 		$accesspath=ACCESS_PATH.$IP;
 		if (! (-e $accesspath)) {
-			print "ERR'n";
+			print "ERR\n";
 		}
 		elsif (open ($accessfile,"+<",$accesspath)) {
+			# The access file has to be locked otherwise one process may read it while
+			# another one updates it.
 			unless (flock ($accessfile, 2)) {
 				close ($accessfile);
 				print "ERR\n";
@@ -32,9 +61,12 @@ while (<STDIN>) {
 			$unlocktime=<$accessfile>;
 			$lasttime=<$accessfile>;
 			
+			# First line is the timestamp of proxy unlocking. Second line is the
+			# timestamp of last access.
 			$unlocktime =~ s/[\r\n]//g;
 			$lasttime =~ s/[\r\n]//g;
 			
+			# The timestamps must be numbers.
 			if ($unlocktime =~ /^([0-9]+)$/) {
 				$unlocktime=int($1);
 			}
@@ -54,11 +86,14 @@ while (<STDIN>) {
 			}
 			
 			if ((abs($accesstime-$unlocktime)>$timeout_unlock) or (abs($accesstime-$lasttime)>$timeout_inact)){
+				# If too much time passed proxy is not unlocked any more. Also the
+				# access file is now useless and will be removed.
 				close ($accessfile);
 				unlink $accesspath;
 				print "ERR\n";
 			}
 			else {
+				# the proxy is unlocked. The access file will now be updated.
 				if (seek($accessfile, 0, 0)) {
 					print $accessfile "$unlocktime\n$accesstime\n";
 					truncate ($accessfile , tell($accessfile));
@@ -68,7 +103,7 @@ while (<STDIN>) {
 			}
 		}
 		else {
-			print "BH\n";
+			print "ERR\n";
 		}
 	}
 	else {
diff --git a/cleararch.1.pl b/cleararch.1.pl
new file mode 100644
index 0000000..63c1b5f
--- /dev/null
+++ b/cleararch.1.pl
@@ -0,0 +1,67 @@
+###PERL;
+
+# cleararch is generated from cleararch.1.pl.
+#
+# This script recursively clears the archive from old files. Otherwise it would
+# grow forever.
+
+use POSIX qw(strftime);
+
+###ARCH_PATH;
+###TIMEOUT_ARCH;
+
+$time = time();
+print strftime("%d.%m.%Y %H:%M:%S", gmtime($time))."\n";
+cleardir(ARCH_PATH);
+
+sub cleardir {
+	(my $dirpath) = @_;
+	my $dir;
+	my $returnvalue = 1;
+	my $subpath;
+	my $subpathfull;
+	my @stat;
+	unless ( opendir ($dir, $dirpath)) {
+		print "Failed to open: $dirpath\/\n";
+		return 0;
+	}
+	while ($subpath = readdir $dir) {
+		$subpathfull=$dirpath.$subpath;
+		if ($subpath =~ /^\.\.?$/) {
+			next;
+		}
+		if (-f $subpathfull) {
+			unless (@stat = stat $subpathfull) {
+				print "Stat fail: $subpathfull\n";
+				$returnvalue = 0;
+				next;
+			}
+			if (abs($time - $stat[9]) > TIMEOUT_ARCH) {
+				unless (unlink $subpathfull) {
+					print "Failed to remove: $subpathfull\n";
+					$returnvalue = 0;
+					next;
+				}
+				print "Removed: $subpathfull\n";
+			}
+			else {
+				$returnvalue = 0;
+			}
+		}
+		elsif (-d $subpathfull) {
+			unless (cleardir($subpathfull.'/')) {
+				$returnvalue = 0;
+				next;
+			}
+			unless (rmdir($subpathfull.'/')) {
+				print "Failed to remove: $subpathfull\/\n";
+				$returnvalue = 0;
+				next;
+			}
+			print "Removed: $subpathfull\/\n";
+		}
+	}
+	closedir($dir);
+	return $returnvalue;
+}
+print "\n";
diff --git a/config.1.txt b/config.1.txt
index f1c983b..f5b5182 100644
--- a/config.1.txt
+++ b/config.1.txt
@@ -1,3 +1,4 @@
+# config.txt is generated from config.1.txt
 ################################################################################
 #copy this to your Apache2 configuration,
 #remember to make the server listen on these ports:
@@ -23,8 +24,8 @@
 ###CGI_ALIAS;
 	
 	SSLEngine on
-	SSLCertificateFile    /etc/apache2/ssl/botm.crt
-	SSLCertificateKeyFile /etc/apache2/ssl/botm.key
+###SSL_CERT;
+###SSL_KEY;
 	
 	#SSLOptions +StdEnvVars
 	
@@ -61,3 +62,5 @@ coredump_dir /var/spool/squid
 ################################################################################
 #Copy this to your crontab:
 ###RM_ACCESS_CRONTAB;
+###CLEARARCH_CRONTAB;
+###OLDLOGS_CRONTAB;
diff --git a/configure.pl b/configure.pl
index 3fcd1e7..a0900bc 100755
--- a/configure.pl
+++ b/configure.pl
@@ -1,5 +1,12 @@
 #!/usr/bin/perl
 
+# The proxy software, when run on a server, will use different directories,
+# host names, tcp ports, etc. than the server on which this software was
+# originally written.
+# These things are defined in the file 'settings'.
+# This script is called from the makefile. It reads the settings file and
+# inserts the information in the source files.
+
 unless ($ARGV[0]) {
 	print STDERR "Configfile missing.\n";
 	exit 1;
@@ -10,6 +17,8 @@ unless (open $configfile, "<", $ARGV[0]) {
 	exit 2;
 }
 
+# Read the config file, line format:
+# some_name = some value # some comment
 while ($line = <$configfile>) {
 	$line =~ s/[\r\n]//g;
 	$line =~ s/#.*$//;
@@ -22,9 +31,13 @@ while ($line = <$configfile>) {
 }
 close ($configfile);
 
+# Now generate things to be inserted.
+
 $def{'UNLOCK_LOG'}        = "use constant UNLOCK_LOG         => '".$set{'log_path'}."unlock.log';";
 $def{'CURL_PATH'}         = "use constant CURL_PATH          => '".$set{'curl'}."';";
+$def{'GZIP_PATH'}         = "use constant GZIP_PATH          => '".$set{'gzip'}."';";
 $def{'DATA_PATH'}         = "use constant DATA_PATH          => '".$set{'data_path'}."';";
+$def{'LOG_PATH'}          = "use constant LOG_PATH           => '".$set{'log_path'}."';";
 $def{'TEMP_PATH'}         = "use constant TEMP_PATH          => '".$set{'tmp_path'}."';";
 $def{'PASS_PATH'}         = "use constant PASS_PATH          => '".$set{'data_path'}."pass/';";
 $def{'ARCH_PATH'}         = "use constant ARCH_PATH          => '".$set{'data_path'}."archive/';";
@@ -37,6 +50,10 @@ $def{'BLOCK_HOST'}        = "use constant BLOCK_HOST         => qr/".$set{'block
 $def{'BLOCK_PORT'}        = 'use constant BLOCK_PORT         => qr/^((0*'.$set{'http_proxy_port'}.')|('.$set{'https_proxy_port'}.')|('.$set{'ssl_proxy_port'}.'))$/;';
 $def{'TIMEOUT_UNLOCK'}    = "use constant TIMEOUT_UNLOCK     => ".$set{'timeout_unlock'}.";";
 $def{'TIMEOUT_INACT'}     = "use constant TIMEOUT_INACT      => ".$set{'timeout_inact'}.";";
+$def{'TIMEOUT_ARCH'}      = "use constant TIMEOUT_ARCH       => ".$set{'timeout_arch'}.";";
+$def{'LOG_SIZE_LIMIT'}    = "use constant LOG_SIZE_LIMIT     => ".$set{'log_size_limit'}.";";
+$def{'LOGS_UNCOMPRESSED'} = "use constant LOGS_UNCOMPRESSED  => ".$set{'logs_uncompressed'}.";";
+$def{'LOGS_TOTAL'}        = "use constant LOGS_TOTAL         => ".$set{'logs_total'}.";";
 $def{'REWRITE_URL'}       = "use constant REWRITE_URL        => '".$set{'https_proxy_domain'}.":".$set{'https_proxy_port'}."';";
 
 $def{'PATH'} = "\$ENV{'PATH'} = '".$set{'path'}."';";
@@ -68,7 +85,11 @@ $def{'HTTP_PORT_SSL'}     = 'http_port '.$set{'ssl_proxy_port'};
 $def{'EXTERNAL_ACL'}      = 'external_acl_type unlocked-check ttl=15 negative_ttl=0 %SRC '.$set{'bin_path'}.'access';
 $def{'EXTERNAL_REWRITE'}  = 'url_rewrite_program '.$set{'bin_path'}.'rewrite';
 $def{'UNLOCK_DOMAIN_ACL'} = 'acl unlockdomain dstdomain '.$set{'unlock_domain'};
+
 $def{'RM_ACCESS_CRONTAB'} = $set{'rm_access_crontab'}.' '.$set{'rm'}.' '.$set{'data_path'}.'access/*';
+$def{'CLEARARCH_CRONTAB'} = $set{'cleararch_crontab'}.' '.$set{'bin_path'}.'cleararch >> '.$set{'log_path'}.'cleararch.log';
+$def{'OLDLOGS_CRONTAB'}   = $set{'oldlogs_crontab'}.' '.$set{'bin_path'}.'oldlogs';
+
 
 $def{'CC'} = 'CC='.$set{'gcc'};
 $def{'CF'} = 'CF='.$set{'c_flags'};
@@ -80,7 +101,9 @@ $def{'OD'} = 'OD='.$set{'bin_path'};
 $def{'CM'} = 'CM='.$set{'chmod'};
 
 
-
+# Now go through input file, find lines to be replaced. Format:
+# ###SOME_NAME;
+# If found - replace.
 
 while ($line = <STDIN>) {
 	$line =~ s/[\r\n]//g;
@@ -91,7 +114,3 @@ while ($line = <STDIN>) {
 		print "$line\n";
 	}
 }
-
-	
-	
-	
\ No newline at end of file
diff --git a/make.sh b/make.sh
index ed4980f..f584d54 100644
--- a/make.sh
+++ b/make.sh
@@ -1,5 +1,9 @@
 #!/bin/sh
 
+# This is the script for making the software. Normally, the makefile is used for
+# this purpose. But the makefile has to be generated first.
+# This script generates the makefile and then uses it and finally removes it.
+
 set -x
 perl configure.pl settings <makefile.1.mak >makefile
 make
diff --git a/makefile.1.mak b/makefile.1.mak
index f3ad995..fe4e8cc 100644
--- a/makefile.1.mak
+++ b/makefile.1.mak
@@ -1,3 +1,5 @@
+# makefile is generated from makefile.1.mak.
+
 ###CC;
 ###CF;
 ###PL;
@@ -10,8 +12,8 @@
 all: moveout copyout remove config.txt
 
 
-moveout: proxy proxy.pl rewrite access access.pl   setuid exec
-	$(MV) proxy proxy.pl access access.pl rewrite $(OD)
+moveout: proxy proxy.pl cleararch oldlogs rewrite access access.pl   setuid exec
+	$(MV) proxy proxy.pl cleararch oldlogs access access.pl rewrite $(OD)
 
 copyout:   setuid exec
 #	 $(CP) access.pl $(OD)
@@ -19,8 +21,8 @@ copyout:   setuid exec
 setuid: proxy access
 	$(CM) u+s proxy access
 
-exec: rewrite access.pl proxy.pl
-	$(CM) +x rewrite access.pl proxy.pl
+exec: cleararch oldlogs rewrite access.pl proxy.pl
+	$(CM) +x cleararch oldlogs rewrite access.pl proxy.pl
 
 remove: proxy proxy.c access access.c   copyout moveout setuid exec 
 	$(RM) proxy.c access.c
@@ -48,6 +50,11 @@ access.c: access.1.c   configure.pl settings
 access: access.c
 	$(CC) $(CF) -o access access.c
 
+cleararch: cleararch.1.pl   configure.pl settings
+	$(PL) configure.pl settings <cleararch.1.pl >cleararch
+
+oldlogs: oldlogs.1.pl   configure.pl settings
+	$(PL) configure.pl settings <oldlogs.1.pl >oldlogs
 
 config.txt: config.1.txt   configure.pl settings
 	$(PL) configure.pl settings <config.1.txt >config.txt
diff --git a/oldlogs.1.pl b/oldlogs.1.pl
new file mode 100644
index 0000000..65e9497
--- /dev/null
+++ b/oldlogs.1.pl
@@ -0,0 +1,77 @@
+###PERL;
+
+# oldlogs is generated from oldlogs.1.pl.
+#
+# This script renames log files if they are big enough.
+# Compresses or removes older log files.
+
+###LOG_PATH;
+###GZIP_PATH;
+###LOG_SIZE_LIMIT;
+###LOGS_TOTAL;
+###LOGS_UNCOMPRESSED;
+
+if ( opendir ($dir, LOG_PATH)) {
+	while ($subpath = readdir $dir) {
+		if ($subpath !~ /\.log$/) {
+			next;
+		}
+		$fullpath=LOG_PATH.$subpath;
+		unless (-f $fullpath) {
+			next;
+		}
+		unless (@stat = stat($fullpath)) {
+			next;
+		}
+		if ($stat[7] > LOG_SIZE_LIMIT) {
+			movelog($fullpath,0,0);
+		}
+		
+	}
+	closedir($dir);
+}
+
+sub movelog {
+	(my $path, my $number, my $gz) = @_;
+	my $nextgz = 0;
+	my $thispath;
+	my $nextpath;
+	my $nextnumber=$number+1;
+	my @gzip_arg = (GZIP_PATH, '-q', '-9','-f');
+	
+	$thispath = $path.(($number != 0)?'.'.$number.($gz?'.gz':''):'');
+	if ($number == LOGS_TOTAL) {
+		if (unlink $thispath) {
+			return 1;
+		}
+		else {
+			return 0;
+		}
+	}
+	if ($number == LOGS_UNCOMPRESSED) {
+		$nextgz=1;
+		$nextpath = $path.'.'.$nextnumber.'.gz';
+	}
+	else {
+		$nextpath = $path.'.'.$nextnumber.($gz?'.gz':'');
+	}
+	
+	if (-e $nextpath) {
+		unless (movelog($path,$nextnumber,($nextgz or $gz)?1:0)) {
+			return 0;
+		}
+	}
+	
+	if ($nextgz) {
+		push @gzip_arg, $thispath;
+		unless (! system (@gzip_arg)) {
+			return 0;
+		}
+		$thispath .= '.gz';
+	}
+	
+	unless (rename ($thispath, $nextpath)) {
+		return 0;
+	}
+	return 1;
+}
diff --git a/proxy.1.c b/proxy.1.c
index edf32c4..85fa4aa 100644
--- a/proxy.1.c
+++ b/proxy.1.c
@@ -1,4 +1,9 @@
-//The SETUID wrapper.
+// proxy.c is generated from proxy.1.c
+//
+// This is the wrapper for proxy.pl.
+// It's run with SETUID to have accesss to some files where the www server
+// should not. That's why it has a C wrapper. In modern systems running scripts
+// directly with SETUID is considered unsafe and not allowed.
 
 #include <unistd.h>
 #include <stdio.h>
diff --git a/proxy.1.pl b/proxy.1.pl
index bf53924..114c55b 100755
--- a/proxy.1.pl
+++ b/proxy.1.pl
@@ -1,5 +1,38 @@
 ###PERL;
 
+# proxy.pl is generated from proxy.1.pl
+#
+# This is the proxy software. It's launched as a CGI program by the http server.
+#
+# The proxy:
+# - determines information about the http(s) request,
+# - checks if user is allowed to use it,
+# - forwards the requests using curl,
+# - sends the response back to the user,
+# - saves a redundant copy of the response.
+#
+# It's run with SETUID to have accesss to some files where the www server
+# should not. That's why it has a C wrapper. In modern systems running scripts
+# directly with SETUID is considered unsafe and not allowed.
+# When running with SETUID the program also has to deal with Perl's built-in
+# securuity.
+#
+# Any data comming from standard input, files, environment variables, command
+# line is treated by Perl as unsafe. Any data created by operations on unsafe
+# data is also unsafe. Perl will not allow to use such data to open files (for
+# writing), launch programs, etc.
+# That's why the data has to be validated and made safe, like this:
+# if ($variable =~/^(some-regex)$/) {
+#   $variable = $1;
+# }
+#
+# see also:
+# http://perldoc.perl.org/perlsec.html#Taint-mode
+# https://tools.ietf.org/html/rfc2616
+# https://tools.ietf.org/html/rfc3875
+# https://tools.ietf.org/html/rfc2396
+
+
 use POSIX qw(strftime);
 
 ###UNLOCK_LOG;
@@ -27,10 +60,12 @@ $timeout_inact = TIMEOUT_INACT*60;
 delete @ENV{qw(IFS CDPATH ENV BASH_ENV)};
 ###PATH;
 
+# If the special URL is used, instaed of proxying there will be proxy unlocking.
 if (($ENV{'HTTP_HOST'} =~ UNLOCK_PROXY_HOST) and (urldecode($ENV{'PATH_INFO'}) =~ UNLOCK_PROXY_PATH)){
 	unlock();
 }
 else {
+	# proxy will work only if unlocked
 	if(access()) {
 		proxy();
 	}
@@ -39,15 +74,23 @@ else {
 	}
 }
 
+# The proxying function
 sub proxy {
-	# srand(time()+$$);
+	# The required information is stored in environment variables (rfc3875).
+	# The HTTPS variable is not defined in rfc3875 (might be a portability
+	# problem?)
 	
-	$archive = 0;
+	$archive = 0; # if this http request should be archived
 	
+	# The proxy will not support request methods  other than GET, POST, HEAD.
+	# There is no need for it. 
 	if ($ENV{'REQUEST_METHOD'} !~ /^(HEAD|GET|POST)$/) {
 		return fail("Status: 405 Method Not Allowed\nAllow: GET, POST, HEAD\n","405 Method Not Allowed","The proxy does not support the $ENV{'REQUEST_METHOD'} method.");
 	}
 	
+	# The proxy may be presented with either a complete URL (when it's a HTTP
+	# proxy) or just the path (when it's a HTTPS proxy).
+	# In this case the  URL has to be reconstructed.
 	if ($ENV{'REQUEST_URI'} =~ /^[a-z]*(:[0-9]*)?:\/\/.*$/){
 		$URL=$ENV{'REQUEST_URI'};
 	}
@@ -55,7 +98,10 @@ sub proxy {
 		$URL=($ENV{'HTTPS'} eq 'on' ? 'https://' : 'http://').$ENV{'HTTP_HOST'}.$ENV{'REQUEST_URI'};
 	}
 	
-	if (validateurl($URL)) { #validate URL against illegal characters
+	# The URL will now be validated.
+	# Additionally it will be divided in parts which will be stored in variables:
+	# $prot (protocol), $host, $port, $path, $query.
+	if (validateurl($URL)) {
 		$URL =~ /^(.*)$/;
 		$URL = $1;
 	}
@@ -63,35 +109,45 @@ sub proxy {
 		return fail("Status: 400 Bad Request\n","400 Bad Request","Invalid URL: $URL.");
 	}
 	
+	# If port not defined default value is used.
 	if ($port eq ''){
 		$port=($ENV{'HTTPS'} eq 'on' ? '443' : '80');
 	}
-		
-	if ($host =~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/) {#IP instead of hostname
+	
+	# The host can be a hostname or IP.
+	# A request to a private IP number gives users access to the proxy's local
+	# network. This should not be allowed.
+	if ($host =~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/) {
 		if ($host =~ /^((0*10\..*)|(0*127\..*)|(0*172\.0*((1[6-9])|(2[0-9])|(3[0-1]))\..*)|(0*192\.0*168\..*))$/) {
 			return fail("Status: 403 Forbidden\n","403 Forbidden","The proxy does not accept a private IP address $host as hostname.");
 		}
 	}
 	
+	# Of course, if there is a web server inside the local network and is accessed
+	# by its hostname it can be accessible. But the server which runs the proxy
+	# can have some hostnames defined that point to computers in local network
+	# that are not available from outside. These should be blocked as well.
 	if ($host =~ BLOCK_HOST) {
 		return fail("Status: 403 Forbidden\n","403 Forbidden","The proxy does not accept $host as hostname.");
 	}
 	
+	# The proxy will block any access with a port number that is used by the
+	# proxy. Otherwise it would be possible to create an infinite loop in which
+	# the request is proxied back to the proxy again. Of course, to create such a
+	# loop not only the port number but also the host name is required. But is not
+	# always easy (or possible) to predict if a hostname will point to the proxy
+	# or not. Blocking port numbers is easier.
+	#That's why the proxy should not be installed on the default port numbers, 80
+	# or 443 because then it will not work!
 	if ($port =~ BLOCK_PORT) {
 		return fail("Status: 403 Forbidden\n","403 Forbidden","The proxy does not accept port number $port because of infinite loop prevention.");
 	}
 	
-	# do {
-		# $temppath=TEMP_PATH.int(rand(100000000)).'_';
-		# $headpath=$temppath.'h';
-		# $downpath=$temppath.'d';
-		# $uppath=$temppath.'u';
-	# } while (( -e $headpath) or ( -e $downpath) or ( -e $uppath));
-	
-	@curl_arg=(CURL_PATH,'-i','-#'); #-s
-	@reqhead;
+	@curl_arg=(CURL_PATH,'-i','-#'); # the command line arguments for curl
+	@reqhead;                        # the HTTP request header
 	
 	foreach $envk (keys %ENV) { 
+		# The relevant http variables either start with HTTP_ or CONTENT_.
 		if ($envk =~ /^(HTTP_[A-Z0-9_]+)$/) {
 			$headname=formatheader($1);
 		}
@@ -101,12 +157,15 @@ sub proxy {
 		else {
 			next;
 		}
+		# test against illegal characters
 		if ($ENV{$envk} =~ /^([\x20-\x7e]*)$/) {
 			$headval=$1;
 		}
 		else {
 			next;
 		}
+		
+		# Some header fields should not be forwarded.
 		if(lc($headname) =~ REQUEST_HEADER_BLOCK) {
 			next;
 		}
@@ -117,6 +176,10 @@ sub proxy {
 		push @curl_arg, $headarg;
 		push @reqhead, $headarg;
 	}
+	
+	# curl uses its default values of User-agent and Accept header fields unless
+	# they are defined in the command line.
+	# If the original request doesn't contain these fields this will undefine them:
 	unless (exists $ENV{'HTTP_USER_AGENT'}) {
 		push @curl_arg, '-H';
 		push @curl_arg, 'User-Agent:';
@@ -131,65 +194,45 @@ sub proxy {
 	}
 	
 	if ($ENV{'REQUEST_METHOD'} eq 'POST'){
-		# binmode(STDIN) or return fail("Status: 500 Internal Server Error\n","500 Internal Server Error","Failed to switch STDIN to binary mode.");
-		# open($upfile,">",$uppath) or return fail("Status: 500 Internal Server Error\n","500 Internal Server Error","Failed to create temporary file.");
-		# binmode ($upfile) or return fail("Status: 500 Internal Server Error\n","500 Internal Server Error","Failed to switch temporary file to binary mode.");
-		
-		# $buffer;
-		# while (read (STDIN,$buffer,65536)) {
-			# unless (print ($upfile $buffer)) {
-				# return fail("Status: 500 Internal Server Error\n","500 Internal Server Error","Failed to write to temporary file.");
-			# }
-		# }
-		# close ($upfile);
-		# $craetedup=1;
-		
-		$postdata=1;
+		$postdata=1;     # if there is data to pe posted
 		push @curl_arg, '--data-binary';
-		# push @curl_arg, '@'.$uppath;
 		push @curl_arg, '@-';
-		
 	}
 	
-	$archpath = makepath($prot, $host, $port, $path);
+	# The path for archiving the HTTP request is made, the required directories
+	# are created.
+	# If this fails the request will not be archived but will still be proxied.
+	$archpath = makepath($prot, $host, $port, $path, $query);
 	if ($archpath) {
 		$archive = 1;
-		if ($query or $postdata) {
+		if ($postdata) {
+			# If this is a POST request the URL is not enough to identify it. To the
+			# path will be added a number. Also the request header and post data will
+			# be archived and not only the response header and response data.
 			for (my $i = 0; ;++$i) {
 				$respcontpath=$archpath.'@v-'.$i;
 				$respheadpath=$archpath.'@h-'.$i;
 				$postheadpath=$archpath.'@g-'.$i;
 				$postcontpath=$archpath.'@u-'.$i;
-				$postquerpath=$archpath.'@q-'.$i;
 				
-				if ((! -e $respcontpath) and (! -e $respheadpath) and (! -e $postheadpath) and (! -e $postcontpath) and (! -e $postquerpath)) {
+				if ((! -e $respcontpath) and (! -e $respheadpath) and (! -e $postheadpath) and (! -e $postcontpath)) {
 					last;
 				}
 			}
 			
-			if ($query) {
-				if (open ($postquerfile, ">", $postquerpath)) {
-					print $postquerfile "$query\n";
-					close($postquerfile);
-				}
-			}
-			
-			if ($postdata) {
-				if (open ($postheadfile, ">", $postheadpath)) {
-					foreach $line (@reqhead) {
-						print $postheadfile "$line\n";
-					}
+			# The request header is written.
+			if (open ($postheadfile, ">", $postheadpath)) {
+				foreach $line (@reqhead) {
 					print $postheadfile "$line\n";
-					close ($postheadfile);
 				}
+				print $postheadfile "$line\n";
+				close ($postheadfile);
 			}
+		
 		}
 		else {
 			$respcontpath=$archpath.'@v';
 			$respheadpath=$archpath.'@h';
-			$postheadpath='';
-			$postcontpath='';
-			$postquerpath='';
 		}
 	}
 	else {
@@ -198,12 +241,14 @@ sub proxy {
 	
 	push @curl_arg, $URL;
 	
+	# The pipes for communication with curl are created.
 	pipe ($curloutrd, $curloutwr) or return fail("Status: 500 Internal Server Error\n","500 Internal Server Error","Failed to create stdout pipe.");
 	pipe ($curlerrrd, $curlerrwr) or return fail("Status: 500 Internal Server Error\n","500 Internal Server Error","Failed to create stderr pipe.");
 	if ($postdata){
 		pipe ($curlinrd, $curlinwr) or return fail("Status: 500 Internal Server Error\n","500 Internal Server Error","Failed to create stdin pipe.");
 	}
 	
+	# curl, the program that will send the forwarded request is launched here
 	$curlpid = fork();
 	unless (defined $curlpid) {
 		return fail("Status: 500 Internal Server Error\n","500 Internal Server Error","fork() failed.");
@@ -212,6 +257,7 @@ sub proxy {
 		close ($curloutrd);
 		close ($curlerrrd);
 		
+		# The input and outputs of curl are redirected to pipes.
 		open(STDERR, ">&=" . fileno($curlerrwr)) or die "Failed to open stderr pipe.\n";
 		open(STDOUT, ">&=" . fileno($curloutwr)) or die "Failed to open stdout pipe.\n";
 		
@@ -228,11 +274,19 @@ sub proxy {
 	close ($curlerrwr);
 	
 	if($postdata) {
+		# If this  is a POST request the post data is available on the standard
+		# input. It will be piped to curl and written to a file (if successfully
+		# opened).
+		# Originally this part would be in a separate thread but:
+		# 1. It didn't work. Not sure why.
+		# 2. It was not necaessary.
+		
 		# $curlpidin = fork();
 		# unless (defined $curlpidin) {
 			# $forceexit=1;
 		# }
 		# unless ($curlpidin) {
+
 			close($curlinrd);
 			binmode($curlinwr) or die "Failed to switch stdin pipe to binary mode.\n";
 			binmode(STDIN) or die "Failed to switch STDIN to binary mode.\n";
@@ -259,36 +313,49 @@ sub proxy {
 			if ($postcontopen) {
 				close($postcontfile);
 			}
+
 			# exit 0;
 		# }
 		# close ($curinwr);
 		# close ($curlinrd);
 	}
 	
-	$safelength=1;
-	$definedlength=0;
-	$safeerror=1;
-	$lastskipped=1;
-	$forceexit=0;
+	$safelength=1;    # if Content-length is safe to be forwarded 
+	$definedlength=0; # if Content-length is defined
+	$safeerror=1;     # if it's safe to send an error message.
+	$lastskipped=1;   # if the previous header field was not forwarded
+	$forceexit=0;     # if the proxy should exit with error
 	
 	if ($line = <$curloutrd>) {
 		$line =~ s/\r?\n$//;
+		# The first line is the status. 
 		if ($line =~ /^HTTP\/[0-9]+\.[0-9]+ ([0-9]+ .*)$/) {
 			$status =$1;
 			@resphead=('Status: '.$status);
 			
+			# If the status is not 2XX and this URL was already archived before it
+			# should not be overwritten.
+			# Otherwise it can happen that, for example, after refreshing the content
+			# previously downloaded will be overwritten with a "304 Not Modified" and
+			# lost.
+			
 			if(($status !~ /^2[0-9][0-9]/) and (-e $respcontpath)){
 				$archive=0;
 			}
 			
+			# now read the header lines
 			while ($line = <$curloutrd>) {
 				$line =~ s/[\r\n]$//g;
 				
+				# empty line (double \n) meane the end of the header.
 				if($line eq ''){
 					last;
 				}
 				
+				# If line starts with space or tab, it's a continuation of the previous
+				# line.
 				if($line =~ /^[ \t](.*)$/){
+					# Continuation of a skipped line should be skipped too.
 					if ($lastskipped) {
 						next;
 					}
@@ -308,6 +375,9 @@ sub proxy {
 					next;
 				}
 				
+				# If one of these header fields is present it can not be assumed that
+				# the received value of Content-length is also correct for the forwarded
+				# content.
 				if($headname eq 'transfer-encoding') {
 					$safelength = 0;
 				}
@@ -322,6 +392,8 @@ sub proxy {
 					$headvalue =~ s/[\r\n \t]*$//;
 					$contentlength = int($headvalue);
 				}
+				
+				# Some header fields should not be forwarded.
 				if($headname =~ RESPONSE_HEADER_BLOCK) {
 					$lastskipped =1;
 					next;
@@ -331,12 +403,16 @@ sub proxy {
 				push @resphead, $line;
 			}
 			
-			if ($lengthdefined and ($lengthsafe or ($contentlength==0))) {
+			# Only add the Content-length header if it's safe to do so.
+			if ($lengthdefined and ($safelength or ($contentlength==0))) {
 				push @resphead, 'Content-length: '.$contentlength;
 			}
 			
+			# After forwarding the response header it's too late to send any error
+			# messages.
 			$safeerror=0;
 			
+			# Forward the header and safe it to file if possible and allowed.
 			if ($archive) {
 				$respheadopen = open($respheadfile, ">", $respheadpath);
 			}
@@ -352,15 +428,16 @@ sub proxy {
 				close $respheadfile;
 			}
 			
+			# Now forward the actual content and safe it to file if possible and
+			# allowed.
 			if ( ! binmode ($curloutrd)) {
-				# print "FAIL 1\n";
 				$forceexit=1;
 			}
 			elsif ( ! binmode (STDOUT) ) {
-				# print "FAIL 2\n";
 				$forceexit=1;
 			}
 			else {
+				# Create the file only if there was content received. No 0-byte files.
 				$firstline=1;
 				while (read ($curloutrd,$buffer,1024)) {
 					unless (print (STDOUT $buffer)) {
@@ -392,9 +469,11 @@ sub proxy {
 		}
 	}
 	
+	# Ok, done. Now wait for curl to exit. (Should have already happened).
 	
 	waitpid($curlpid,0);
 	if ($? != 0) {
+		# Generate error message if possible.
 		if ($safeerror) {
 			$errormsg='';
 				while ($line = <$curlerrrd>) {
@@ -416,6 +495,7 @@ sub proxy {
 		# }
 	# }
 	
+	# Send error message if it;s safe to do so.
 	if($safeerror){
 		return fail("Status: 500 Internal Server Error\n","500 Internal Server Error",$errormsg);
 	}
@@ -425,31 +505,44 @@ sub proxy {
 	
 }
 
-sub access { #kind of doubles the functionality of access.pl but for http
+# This function checks if the user has unlocked the proxy. Nonzero means yes.
+sub access { 
 	if($ENV{'HTTP_HOST'} =~ UNLOCK_PROXY_HOST){
 		return 1;
 	}
+	
+	# Check if IP
 	if ($ENV{'REMOTE_ADDR'} =~ /^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)$/) {
 		$IP = $1;
+		
+		# 127.0.0.1 is always allowed. All https requests are tunnelled through
+		# 127.0.0.1.
 		if ($IP eq '127.0.0.1') {
 			return 1;
 		}
+		
+		# If access file doesn't exist the proxy is not unlocked for this IP.
 		$accesspath=ACCESS_PATH.$IP;
 		if (! (-e $accesspath)) {
 			return 0;
 		}
 		
 		elsif (open ($accessfile,"+<",$accesspath)) {
+			# The access file has to be locked otherwise one process may read it while
+			# another one updates it.
 			unless (flock ($accessfile, 2)) {
 				close ($accessfile);
 				return 0;
 			}
+			# First line is the timestamp of proxy unlocking. Second line is the
+			# timestamp of last access.
 			$unlocktime=<$accessfile>;
 			$lasttime=<$accessfile>;
 
 			$unlocktime =~ s/[\r\n]//g;
 			$lasttime =~ s/[\r\n]//g;
 			
+			# The timestamps must be numbers.
 			if ($unlocktime =~ /^([0-9]+)$/) {
 				$unlocktime=int($1);
 			}
@@ -467,11 +560,14 @@ sub access { #kind of doubles the functionality of access.pl but for http
 			}
 			
 			if ((abs($accesstime-$unlocktime)>$timeout_unlock) or (abs($accesstime-$lasttime)>$timeout_inact)){
+				# If too much time passed proxy is not unlocked any more. Also the
+				# access file is now useless and will be removed.
 				close ($accessfile);
 				unlink $accesspath;
 				return 0;
 			}
 			else {
+				# the proxy is unlocked. The access file will now be updated.
 				if (seek($accessfile, 0, 0)) {
 					print $accessfile "$unlocktime\n$accesstime\n";
 					truncate ($accessfile , tell($accessfile));
@@ -489,15 +585,21 @@ sub access { #kind of doubles the functionality of access.pl but for http
 	}
 }
 
+# Function to unlock the proxy.
 sub unlock {
 	if ($ENV{'REMOTE_ADDR'} =~ /^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)$/) {
 		$IP=$1;
 	}
 	
+	# If the request method is GET the CGI parameters must be taken from the
+	# query string (part of URL).
 	if ($ENV{'REQUEST_METHOD'} eq 'GET') {
 		%CGI=getcgi($ENV{'QUERY_STRING'});
 	}
+	# If request method is POST the CGI parameters must be taken from standard
+	# input.
 	elsif ($ENV{'REQUEST_METHOD'} eq 'POST'){
+		#Check content-type. Multipart is not supported.
 		if ($ENV{'CONTENT_TYPE'} eq 'application/x-www-form-urlencoded'){
 			%CGI=getcgi( <STDIN> );
 		}
@@ -509,12 +611,13 @@ sub unlock {
 		return unlockpage("Unsupported method: $ENV{'REQUEST_METHOD'}.","Status: 405 Method Not Allowed\nAllow: GET, POST\n");
 	}
 	
+	# If there was an IP in the form it must be used instead.
 	if ($CGI{'ip'} =~ /^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)$/) {
 		$IP=$1;
 	}
 	
 	if ($IP eq '') {
-		return unlockpage("$Invalid IP.","Status: 403 Forbidden\n");
+		return unlockpage("Invalid IP.","Status: 403 Forbidden\n");
 	}
 	
 	if ($CGI{'username'} eq ''){
@@ -530,21 +633,25 @@ sub unlock {
 	}
 	
 	$passpath = PASS_PATH.$CGI{'username'};
-	
+	# Open file to read user's password.
 	open($passfile, "<", $passpath) or return unlockpage('Wrong username or password.',"Status: 403 Forbidden\n");
 	$pass = <$passfile>;
 	close($passfile);
 	$pass =~ s/[\r\n]//g;
+	# The password is stored in URL-encoded form.
 	$pass = urldecode($pass);
 	
 	if ($pass ne $CGI{'password'}){
 		return unlockpage('Wrong username or password.',"Status: 403 Forbidden\n");
 	}
 	
+	# All unlocking access must be logged: time, IP, user. The server owner must
+	# be able to know who did what with the proxy (in  case of legal problems).
 	open ($logfile, ">>", UNLOCK_LOG) or return unlockpage("Couldn't log your action.","Status: 500 Internal Server Error\n");
 	print $logfile strftime("%d.%m.%Y %H:%M:%S", gmtime($accesstime))."  $IP  $CGI{'username'}\n";
 	close($logfile);
 	
+	# Ok, unlocked. Create access file.
 	$accesspath=ACCESS_PATH.$IP;
 	open ($accessfile,">",$accesspath) or return unlockpage("$accesspath","Status: 403 Forbidden\n");
 	print $accessfile "$accesstime\n$accesstime\n";
@@ -554,6 +661,8 @@ sub unlock {
 	
 }
 
+# The function to get CGI parameters from string.
+# Format is: name=url_encoded_value&name=url_encoded_value& ... &name=url_encoded_value
 sub getcgi {
 	my $arg;
 	my $varl;
@@ -568,6 +677,7 @@ sub getcgi {
 	return %cgi;
 }
 
+# Function for decoding URL-encoded text
 sub urldecode {
 	my $t = $_[0];
 	$t =~ s/\+/ /g;
@@ -575,6 +685,11 @@ sub urldecode {
 	return $t;
 }
 
+# Function for formatting header field names.
+# The Environment variable name is like this: HTTP_HEADER_NAME or HEADER_NAME.
+# But should be Header-Name.
+# Actually the names are case-insensitive but in practise they often look like
+# this. (Otherwise suspicious?)
 sub formatheader {
 	my $t = $_[0];
 	$t =~ s/^HTTP_//;
@@ -584,6 +699,9 @@ sub formatheader {
 	return $t;
 }
 
+# Function for showing the proxy unlock form page.
+# arguments: 1 - additional message (optional), 2 - additional header fields
+# (optional)
 sub unlockpage {
 	(my $message, my $header)=@_;
 	if($header ne ''){
@@ -616,6 +734,8 @@ sub unlockpage {
 	print "</ul></body></html>\n";
 }
 
+# Function for showing error messages.
+# Arguments: 1 - header fields, 2 - page title, 3 - error message text.
 sub fail {
 	(my $header, my $title, my $message)=@_;
 	if($header ne ''){
@@ -638,6 +758,7 @@ sub fail {
 	print "</body></html>\n";
 }
 
+# Function to show the message after unlocking the proxy.
 sub unlockedpage {
 	print "Content-type: text/html\n\n";
 	print '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "">';
@@ -654,6 +775,7 @@ sub unlockedpage {
 	print "</ul></body></html>\n";
 }
 
+# Function to show the message when proxy not unlocked.
 sub noaccess {
 	print "Status: 403 Forbidden\n;";
 	print "Content-type: text/html\n\n";
@@ -669,6 +791,7 @@ sub noaccess {
 	print "</body></html>\n";
 }
 
+# Function to show debug information. Not used any more.
 sub debag {
 	print "Content-type: text/plain\n\n";
 	foreach $envk (keys %ENV) { 
@@ -690,6 +813,8 @@ sub debag {
 	print 'URL: ',$URL,"\n";
 }
 
+# Function to check URL and divide in parts: protocol, hostname, port number,
+# path, query string.
 sub validateurl {
 	my $url = $_[0];
 	$prot;
@@ -761,12 +886,15 @@ sub validateurl {
 	return 1;	
 }
 
+# Function to convert URL to archive path. Also creates required directories.
+# Returns the path on success and empty string on fail.
+# Inspired by the MIRA browser.
 sub makepath {
-	#inspired by the MIRA browser!
-	(my $prot, my $host, my $port, my $path) = @_;
+	(my $prot, my $host, my $port, my $path, my $query) = @_;
 	
 	my $archpath = ARCH_PATH;
 	
+	# First subdir: protocol @p port_number
 	if ($prot =~ /^(https?)$/) {
 		$archpath .= $1;
 	}
@@ -785,6 +913,18 @@ sub makepath {
 		}
 	}
 	
+	# Host name: each segment between dots is a subdir, revedrsed order, last part
+	# ends with @n.
+	# For example www.example.com becomes
+	# ... /com/example/www@/ ...
+	
+	# When segment is longer than 120 characters it's divided into 64 character
+	# parts ending with '-' until the last one is not longer than 120.
+	# For example
+	# www.aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz.com
+	# becomes
+	# ... /com/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggg-/gggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmm-/mmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz/www@n/ ...
+	
 	if($host =~ /^([A-Za-z0-9\-\.]+)$/) {
 		$host = $1;
 	}
@@ -832,13 +972,30 @@ sub makepath {
 		}
 	}
 	
+	# Path: each segment between '/' is subdir. The segment after the last '/' is
+	# not a subdir - it is part of the file name. And it will be appended with
+	# some characters outside of this function: @g, @h, @u or @v.
+	# Exception: when there is a query string the last segment is a subdir too and
+	# ends  with q.
+	# Characters that are not letters or numbers or '_' or '.' are encoded. Like
+	# in URL-encoding but with '@' instead of '%'.
+	# When segment is longer than 120 characters it's divided into 64 character
+	# parts ending with '-' until the last one is not longer than 120.
+	
+	# For example:
+	# /some/path/?a=1  becomes  ... /some/path@q/ ...
+	# /some/other-path  becomes  ... /some/other@2Dpath
+	# /path/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz/yyy
+	# becomes
+	# ... /path/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggg-/gggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmm-/mmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz/yyy
+	
 	$path =~ s/^\///;
 	
 	while ((my $ind = index ($path, '/'))>=0) {
 		my $part = substr $path, 0, $ind;
 		$path= substr $path, $ind+1;
 		
-		$part =~ s/[^A-Za-z0-9_\.]/sprintf ("@%02X",ord($1))/eg;
+		$part =~ s/([^A-Za-z0-9_\.])/sprintf ("@%02X",ord($1))/eg;
 		
 		while (length ($part) > 120) {
 			$archpath .= '/'.substr($part,0,64).'-';
@@ -858,7 +1015,7 @@ sub makepath {
 			}
 		}
 	}
-	$path =~ s/[^A-Za-z0-9_\.]/sprintf ("@%02X",ord($1))/eg;
+	$path =~ s/([^A-Za-z0-9_\.])/sprintf ("@%02X",ord($1))/eg;
 	while (length ($path) > 120) {
 		$archpath .= '/'.substr($path,0,64).'-';
 		$path = substr($path,64);
@@ -869,6 +1026,46 @@ sub makepath {
 			}
 		}
 	}
+	
+	if ($query) {
+		# query string: The last part is not a subdir - it is part of the file name.
+		# And it will be appended with some characters outside of this function: @g,
+		# @h, @u or @v.
+		# When Query string is longer than 240 characters it's divided into 128
+		# character parts ending with '-' until the last one is not longer than 240.
+		# Characters that are not letters or numbers or '_' or '.' are encoded. Like
+		# in URL-encoding but with '@' instead of '%'.
+		# When segment is longer than 120 characters it's divided into 64 character
+		# parts ending with '-' until the last one is not longer than 120.
+		# The '?' at the beginning is not part of the query string.
+		# For example:
+		# ?a=1&b=%25%5E%26 becomes ... /a@3D1@26b@3D@2525@255E@2526
+		# ?a=aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz&b=aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz
+		# becomes
+		# ... /a@3Daaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmm-/mmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz@26@b@3Daaaaaaaaaabbbb-/bbbbbbccccccccccddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhhiiiiiiiiiijjjjjjjjjjkkkkkkkkkkllllllllllmmmmmmmmmmnnnnnnnnnnoooooooooopppppppppprrrrrrrrrrssssssssssttttttttttuuuuuuuuuuwwwwwwwwwwyyyyyyyyyyzzzzzzzzzz
+		$query =~ s/([^A-Za-z0-9_\.])/sprintf ("@%02X",ord($1))/eg;
+		
+		$archpath .= '/'.$path.'@q';
+		
+		$path = $query;
+		unless (-d $archpath)
+		{
+			unless (mkdir $archpath) {
+				return '';
+			}
+		}
+		
+		while (length ($path) > 240) {
+			$archpath .= '/'.substr($path,0,128).'-';
+			$path = substr($path,128);
+			unless (-d $archpath)
+			{
+				unless (mkdir $archpath) {
+					return '';
+				}
+			}
+		}
+	}
 	$archpath .= '/'.$path;
 	return $archpath;
 }
diff --git a/rewrite.1.pl b/rewrite.1.pl
index 1578162..1ffe9ed 100755
--- a/rewrite.1.pl
+++ b/rewrite.1.pl
@@ -1,5 +1,13 @@
 ###PERL;
 
+# rewrite is generated from rewrite.1.pl
+#
+# This is the Squid helper program used for redirection. It always redirects to
+# the https proxy.
+#
+# see also:
+# http://wiki.squid-cache.org/Features/AddonHelpers#URL_Re-Writing_.28Mangling.29
+
 ###REWRITE_URL;
 
 $|=1;
diff --git a/settings b/settings
index 09502af..fb844a8 100644
--- a/settings
+++ b/settings
@@ -20,10 +20,11 @@ server_admin       = bicyclesonthemoon@chirpingmustard.info
 
 # No matter what key you use there will be ALWAYS an unavoidable certifficate
 # mismatch warning. Because the proxy does an equivalent to a MITM attack. 
-ssl_key  = /etc/apache2/ssl/botm.key
-ssl_cert = /etc/apache2/ssl/botm.crt
+ssl_key  = /etc/apache2/ssl/proxy.key
+ssl_cert = /etc/apache2/ssl/proxy.crt
 
-#doesn't have to be a real domain
+# The domain and path used for proxy unlocking
+# doesn't have to be a real domain
 unlock_domain       = yplom.bicyclesonthemoon.info
 unlock_path         = /proxy/unlock
 unlock_domain_regex = ^yplom\.bicyclesonthemoon\.info(:[0-9]*)?$
@@ -37,6 +38,9 @@ block_host_regex = ^(localhost|(botcastle[0-9]*))$
 timeout_unlock = 90
 timeout_inact  = 15
 
+#Time in seconds
+timeout_arch = 172800 # how old files must be to safely remove them
+
 path    = /usr/local/bin:/usr/bin:/bin
 perl    = /usr/bin/perl
 curl    = /usr/bin/curl
@@ -45,6 +49,14 @@ cp      = /bin/cp
 mv      = /bin/mv
 rm      = /bin/rm
 gcc     = /usr/bin/gcc
+gzip    = /bin/gzip
 c_flags = -g -Wall
 
-rm_access_crontab = 0 0 * * * #How often to remove leftover unlock info.
+log_size_limit    = 65536 # How big can a log file be
+logs_uncompressed =     2 # How many uncompressed old logs to keep
+logs_total        =    10 # How many old logs to keep 
+
+rm_access_crontab =  0 1 * * * # How often to remove leftover unlock info.
+cleararch_crontab = 30 0 * * * # How often to clear the archive from old files.
+oldlogs_crontab   =  0 0 * * * # How often to deal with old logs
+# Not a good idea to launch oldlogs just after cleararch, I think.
-- 
2.30.2