From: b <b@05ce6ebb-7522-4a6e-a768-0026ae12be9f>
Date: Wed, 16 Dec 2015 21:03:43 +0000 (+0000)
Subject: added readheaderfile() to library (from facebug1)
X-Git-Url: http://bicyclesonthemoon.info/git-projects/?a=commitdiff_plain;h=cd4313debd5d89090b0966ddf482204913840ca0;p=yplom%2Fproxy

added readheaderfile() to library (from facebug1)
improved readconfigfile()


git-svn-id: svn://botcastle1b/yplom/proxy@13 05ce6ebb-7522-4a6e-a768-0026ae12be9f
---

diff --git a/proxy_lib.1.pm b/proxy_lib.1.pm
index 82fc153..a7dcfc4 100644
--- a/proxy_lib.1.pm
+++ b/proxy_lib.1.pm
@@ -271,10 +271,10 @@ use constant entitycode => {
 ###TIMEOUT_INACT;
 ###UNLOCK_PROXY_HOST;
 
-$VERSION     = 0.000003;
+$VERSION     = 0.000004;
 @ISA         = qw(Exporter);
 @EXPORT      = ();
-@EXPORT_OK   = qw(access divideurl entitydecode formatheader getcgi joinurl path2url url2path path2urldiv readconfigfile urldecode urldiv2path);
+@EXPORT_OK   = qw(access divideurl entitydecode formatheader getcgi joinurl path2url url2path path2urldiv readconfigfile readheaderfile urldecode urldiv2path);
 %EXPORT_TAGS = ();
 
 # This function checks if the user has unlocked the proxy. Nonzero means yes.
@@ -447,20 +447,25 @@ sub urldecode {
 	return $t;
 }
 
+# Function for decoding html entities; by number decimal, haeadecimal or by name
 sub entitydecode {
 	my $t = $_[0];
 	$t =~ s/&(#?[a-zA-Z0-9]+);/entitydecode1ch($1)/eg;
 	return $t;
 }
 
+# Function for decoding one html entity, called from entitydecode().
 sub entitydecode1ch {
 	my $t = $_[0];
+	# decode decimal number
 	if ($t =~ /^#([0-9]+)$/) {
 		return chr($1);
 	}
+	# decode hexadecimal number
 	elsif ($t =~ /^#x([0-9A-Fa-f]+)$/) {
 		return chr(hex($1));
 	}
+	# decode entity name
 	elsif ($t =~ /^([A-Za-z0-9]+)$/) {
 		return entitycode->{$1};
 	}
@@ -499,8 +504,8 @@ sub formatheader {
 	return $t;
 }
 
-# Function to convert URL to archive path. Also creates required directories
-# if $mkdir true.
+# Function to convert URL (divided) to archive path. Also creates required
+# directories if $mkdir true.
 # Returns the path on success and undef on fail.
 # Inspired by the MIRA browser.
 sub urldiv2path {
@@ -655,8 +660,6 @@ sub urldiv2path {
 		# character parts ending with '-' until the last one is not longer than 240.
 		# Characters that are not letters or numbers or '_' or '.' are encoded. Like
 		# in URL-encoding but with '@' instead of '%'.
-		# When segment is longer than 120 characters it's divided into 64 character
-		# parts ending with '-' until the last one is not longer than 120.
 		# The '?' at the beginning is not part of the query string.
 		# For example:
 		# ?a=1&b=%25%5E%26 becomes ... /a@3D1@26b@3D@2525@255E@2526
@@ -690,10 +693,12 @@ sub urldiv2path {
 	return $archpath;
 }
 
+# Function to convert URL (one string) to archive path
 sub url2path {
 	return urldiv2path(divideurl($_[0]));
 }
 
+# Function to convert archive path to URL (divided)
 sub path2urldiv {
 	my $archpath = $_[0];
 	my $prot;
@@ -759,6 +764,7 @@ sub path2urldiv {
 	return ($prot, $host, $port, $path, $query);
 }
 
+# Function to join parts of URL to one string
 sub joinurl {
 	(my $prot, my $host, my $port, my $path, my $query) = @_;
 	my $url;
@@ -807,26 +813,130 @@ sub joinurl {
 	return $url;
 }
 
+# Function to convert archive path to URL (one string)
 sub path2url {
 	return joinurl(path2urldiv($_[0]));
 }
 
+# Function to read http headers. It reads data from a file containing a http
+# header (see: https://tools.ietf.org/html/rfc2616#section-4.2). One exception:
+# the status line is not special. Instead the status is just another header
+# field with the name 'Status' (like in a CGI header)
+# 
+# Returns a hash containing the values.
+# Names are case sensitive and are converted to lowercase
+#
+# Argument can be a path or a file handle. In case of a file handle it will just
+# read the file. In case of path it opens the file before reading and closes
+# after. On failure (file not open) returns empty hash.
+# 
+sub readheaderfile {
+	(my $headerpath) = @_;
+	my $headerfile;
+	my %header;
+	
+	# check if $headerpath is actually a path or maybe a filehandle
+	# filehandles are references.
+	if(ref($headerpath)) {
+		$headerfile=$headerpath;
+	}
+	else {
+		unless (open ($headerfile, "<", $headerpath)) {
+			return %header;
+		}
+	}
+	
+	# The name of header field in previous line. Required for header fields that
+	# occupy multiple lines.
+	my $lastname='';
+	
+	while (defined(my $line = <$headerfile>)) {
+		$line =~ s/[\r\n]$//g;
+		my $headname='';
+		my $headval='';
+		
+		# Line starts with whitespace. It's a continuation of the previous line.
+		# Concatenate the field value.
+		if($line =~ /^[ \t]+([^ \t](.*[^ \t])?)[ \t]*$/){
+			if($lastname ne '') {
+				$header{$lastname}.=$1;
+			}
+		}
+		# Line starts with a name followed by colon.
+		elsif ($line =~ /^([^:]+):[ \t]*([^ \t](.*[^ \t])?)[ \t]*$/) {
+			$headname = lc($1);
+			$headval = $2;
+			# If there already was a header field with the same name then the value
+			# is concantenated, separated by comma.
+			if ($header{$headname} ne '') {
+				$header{$headname}.=', '.$headval;
+			}
+			# otherwise just save the value
+			else {
+				$header{$headname}=$headval;
+			}
+			$lastname = $headname;
+		}
+	}
+	
+	# If argument was a path the file must be closed. 
+	unless (ref($headerpath)) {
+		close ($headerfile);
+	}
+	
+	return %header;
+}
+
+# Function to read configuration files.
+# In each line the '#' and everything after it is treated as a comment and
+# ignored.
+# The format is:
+#  name = value
+# There can be whitespaces (space, tab) before after or between the name, value
+# and '='. Leading and trailing whitespaces are not part of the value or name.
+# Name can be made of letters, numbers, '_', '-', and '.'. Names are case
+# sensitive.
+#   
+# In some cases it may not possible to save the required data in this
+# format. There may be a need to change this format in the future.
+# 
+# Returns a hash containing the values.
+# 
+# Argument can be a path or a file handle. In case of a file handle it will just
+# read the file. In case of path it opens the file before reading and closes
+# after. On failure (file not open) returns empty hash.
+# 
+# This function is duplicated in configure.pl. configure.pl can't use a function
+# from a file that it generates. That would create a dependency loop.
+# 
 sub readconfigfile {
 	(my $configpath) = @_;
 	my $configfile;
 	my %config;
 	
-	if (open ($configfile, "<", $configpath)) {
-		while (defined(my $line = <$configfile>)) {
-			$line =~ s/[\r\n]//g;
-			$line =~ s/#.*$//;
-			if ($line =~ /^ *([a-zA-Z0-9_\-\.]+) *= *(.*)$/){
-				my $name=$1;
-				my $value=$2;
-				$value =~ s/ *$//;
-				$config{$name}=$value;
-			}
+	# check if $configpath is actually a path or maybe a filehandle
+	# filehandles are references.
+	if(ref($configpath)) {
+		$configfile=$configpath;
+	}
+	else {
+		unless (open ($configfile, "<", $configpath)) {
+			return %config;
 		}
+	}
+	
+	while (defined(my $line = <$configfile>)) {
+		$line =~ s/[\r\n]//g;
+		$line =~ s/#.*$//; #comment
+		if ($line =~ /^[ \t]*([a-zA-Z0-9_\-\.]+)[ \t]*=[ \t]*([^ \t](.*[^ \t])?)[ \t]*$/){
+			my $name=$1;
+			my $value=$2;
+			$config{$name}=$value;
+		}
+	}
+	
+	# If argument was a path the file must be closed. 
+	unless (ref($configpath)) {
 		close ($configfile);
 	}
 	return %config;