From: b Date: Wed, 16 Dec 2015 21:03:43 +0000 (+0000) Subject: added readheaderfile() to library (from facebug1) X-Git-Url: http://bicyclesonthemoon.info/git-projects/?a=commitdiff_plain;h=cd4313debd5d89090b0966ddf482204913840ca0;p=yplom%2Fproxy added readheaderfile() to library (from facebug1) improved readconfigfile() git-svn-id: svn://botcastle1b/yplom/proxy@13 05ce6ebb-7522-4a6e-a768-0026ae12be9f --- diff --git a/proxy_lib.1.pm b/proxy_lib.1.pm index 82fc153..a7dcfc4 100644 --- a/proxy_lib.1.pm +++ b/proxy_lib.1.pm @@ -271,10 +271,10 @@ use constant entitycode => { ###TIMEOUT_INACT; ###UNLOCK_PROXY_HOST; -$VERSION = 0.000003; +$VERSION = 0.000004; @ISA = qw(Exporter); @EXPORT = (); -@EXPORT_OK = qw(access divideurl entitydecode formatheader getcgi joinurl path2url url2path path2urldiv readconfigfile urldecode urldiv2path); +@EXPORT_OK = qw(access divideurl entitydecode formatheader getcgi joinurl path2url url2path path2urldiv readconfigfile readheaderfile urldecode urldiv2path); %EXPORT_TAGS = (); # This function checks if the user has unlocked the proxy. Nonzero means yes. @@ -447,20 +447,25 @@ sub urldecode { return $t; } +# Function for decoding html entities; by number decimal, haeadecimal or by name sub entitydecode { my $t = $_[0]; $t =~ s/&(#?[a-zA-Z0-9]+);/entitydecode1ch($1)/eg; return $t; } +# Function for decoding one html entity, called from entitydecode(). sub entitydecode1ch { my $t = $_[0]; + # decode decimal number if ($t =~ /^#([0-9]+)$/) { return chr($1); } + # decode hexadecimal number elsif ($t =~ /^#x([0-9A-Fa-f]+)$/) { return chr(hex($1)); } + # decode entity name elsif ($t =~ /^([A-Za-z0-9]+)$/) { return entitycode->{$1}; } @@ -499,8 +504,8 @@ sub formatheader { return $t; } -# Function to convert URL to archive path. Also creates required directories -# if $mkdir true. +# Function to convert URL (divided) to archive path. Also creates required +# directories if $mkdir true. # Returns the path on success and undef on fail. # Inspired by the MIRA browser. sub urldiv2path { @@ -655,8 +660,6 @@ sub urldiv2path { # character parts ending with '-' until the last one is not longer than 240. # Characters that are not letters or numbers or '_' or '.' are encoded. Like # in URL-encoding but with '@' instead of '%'. - # When segment is longer than 120 characters it's divided into 64 character - # parts ending with '-' until the last one is not longer than 120. # The '?' at the beginning is not part of the query string. # For example: # ?a=1&b=%25%5E%26 becomes ... /a@3D1@26b@3D@2525@255E@2526 @@ -690,10 +693,12 @@ sub urldiv2path { return $archpath; } +# Function to convert URL (one string) to archive path sub url2path { return urldiv2path(divideurl($_[0])); } +# Function to convert archive path to URL (divided) sub path2urldiv { my $archpath = $_[0]; my $prot; @@ -759,6 +764,7 @@ sub path2urldiv { return ($prot, $host, $port, $path, $query); } +# Function to join parts of URL to one string sub joinurl { (my $prot, my $host, my $port, my $path, my $query) = @_; my $url; @@ -807,26 +813,130 @@ sub joinurl { return $url; } +# Function to convert archive path to URL (one string) sub path2url { return joinurl(path2urldiv($_[0])); } +# Function to read http headers. It reads data from a file containing a http +# header (see: https://tools.ietf.org/html/rfc2616#section-4.2). One exception: +# the status line is not special. Instead the status is just another header +# field with the name 'Status' (like in a CGI header) +# +# Returns a hash containing the values. +# Names are case sensitive and are converted to lowercase +# +# Argument can be a path or a file handle. In case of a file handle it will just +# read the file. In case of path it opens the file before reading and closes +# after. On failure (file not open) returns empty hash. +# +sub readheaderfile { + (my $headerpath) = @_; + my $headerfile; + my %header; + + # check if $headerpath is actually a path or maybe a filehandle + # filehandles are references. + if(ref($headerpath)) { + $headerfile=$headerpath; + } + else { + unless (open ($headerfile, "<", $headerpath)) { + return %header; + } + } + + # The name of header field in previous line. Required for header fields that + # occupy multiple lines. + my $lastname=''; + + while (defined(my $line = <$headerfile>)) { + $line =~ s/[\r\n]$//g; + my $headname=''; + my $headval=''; + + # Line starts with whitespace. It's a continuation of the previous line. + # Concatenate the field value. + if($line =~ /^[ \t]+([^ \t](.*[^ \t])?)[ \t]*$/){ + if($lastname ne '') { + $header{$lastname}.=$1; + } + } + # Line starts with a name followed by colon. + elsif ($line =~ /^([^:]+):[ \t]*([^ \t](.*[^ \t])?)[ \t]*$/) { + $headname = lc($1); + $headval = $2; + # If there already was a header field with the same name then the value + # is concantenated, separated by comma. + if ($header{$headname} ne '') { + $header{$headname}.=', '.$headval; + } + # otherwise just save the value + else { + $header{$headname}=$headval; + } + $lastname = $headname; + } + } + + # If argument was a path the file must be closed. + unless (ref($headerpath)) { + close ($headerfile); + } + + return %header; +} + +# Function to read configuration files. +# In each line the '#' and everything after it is treated as a comment and +# ignored. +# The format is: +# name = value +# There can be whitespaces (space, tab) before after or between the name, value +# and '='. Leading and trailing whitespaces are not part of the value or name. +# Name can be made of letters, numbers, '_', '-', and '.'. Names are case +# sensitive. +# +# In some cases it may not possible to save the required data in this +# format. There may be a need to change this format in the future. +# +# Returns a hash containing the values. +# +# Argument can be a path or a file handle. In case of a file handle it will just +# read the file. In case of path it opens the file before reading and closes +# after. On failure (file not open) returns empty hash. +# +# This function is duplicated in configure.pl. configure.pl can't use a function +# from a file that it generates. That would create a dependency loop. +# sub readconfigfile { (my $configpath) = @_; my $configfile; my %config; - if (open ($configfile, "<", $configpath)) { - while (defined(my $line = <$configfile>)) { - $line =~ s/[\r\n]//g; - $line =~ s/#.*$//; - if ($line =~ /^ *([a-zA-Z0-9_\-\.]+) *= *(.*)$/){ - my $name=$1; - my $value=$2; - $value =~ s/ *$//; - $config{$name}=$value; - } + # check if $configpath is actually a path or maybe a filehandle + # filehandles are references. + if(ref($configpath)) { + $configfile=$configpath; + } + else { + unless (open ($configfile, "<", $configpath)) { + return %config; } + } + + while (defined(my $line = <$configfile>)) { + $line =~ s/[\r\n]//g; + $line =~ s/#.*$//; #comment + if ($line =~ /^[ \t]*([a-zA-Z0-9_\-\.]+)[ \t]*=[ \t]*([^ \t](.*[^ \t])?)[ \t]*$/){ + my $name=$1; + my $value=$2; + $config{$name}=$value; + } + } + + # If argument was a path the file must be closed. + unless (ref($configpath)) { close ($configfile); } return %config;