From: b Date: Sat, 30 Sep 2023 20:14:15 +0000 (+0000) Subject: improve regexp handling in some functions X-Git-Tag: v1.0.22 X-Git-Url: http://bicyclesonthemoon.info/git-projects/?a=commitdiff_plain;h=c88ca49b964a50d81875ab64ccaf2f25c64b89f3;p=botm%2Fcommon-perl improve regexp handling in some functions --- diff --git a/botm_common.pm b/botm_common.pm index 82c62a6..6d76275 100644 --- a/botm_common.pm +++ b/botm_common.pm @@ -25,7 +25,7 @@ use Encode ('encode', 'decode'); use Exporter; -our $VERSION = '1.0.21'; +our $VERSION = '1.0.22'; our @ISA = qw(Exporter); our @EXPORT = (); our @EXPORT_OK = ( @@ -113,6 +113,10 @@ sub make_temp_path { return join_path('/', $dir, $filename); } +################################### +## ENCODING + SYSTEM FUNCTIONS ## +################################### + sub system_encoded { (my $cmd, my @arg) = @_; my @newarg; @@ -235,8 +239,8 @@ sub read_data_file { next; } - $line =~ s/[\n]$//g; - $line =~ s/[\r]$//g; + $line =~ s/\n$//gs; + $line =~ s/\r$//gs; # Empty line - end of header. if ($line eq ''){ @@ -244,15 +248,15 @@ sub read_data_file { } # Line starts with whitespace. It's a continuation of the previous line. # Concatenate the field value, separated by newline. - elsif($line =~ /^[ \t](.*)$/){ + elsif($line =~ /^[ \t]/){ if($lastname ne '') { - $data{$lastname} .= "\n".$1; + $data{$lastname} .= "\n".$'; } } # Line starts with a name followed by colon/equal sign. Save the value - elsif ($line =~ /^([ -9;-<>-~]+)((:[ \t])|=)(.*)$/) { + elsif ($line =~ /^([ -9;-<>-~]+)((:[ \t])|=)/s) { $name = lc($1); - $value = $4; + $value = $'; $data{$name} = $value; @@ -317,8 +321,8 @@ sub write_data_file { } my $value = $data->{$ind}; # convert newlines - add spaces at continuation line - $value =~ s/\r//g; - $value =~ s/\n/\n /g; + $value =~ s/(\r)?\n/\n /g; + $value =~ s/\r/\n /g; print $fh "$name: $value\n"; } } @@ -432,8 +436,8 @@ sub read_header_file { my $lastname=''; while (defined(my $line = <$fh>)) { - $line =~ s/[\n]$//g; - $line =~ s/[\r]$//g; + $line =~ s/\n$//gs; + $line =~ s/\r$//gs; if ($status_line) { $line =~ /^([^ ]+) +([^ ]+)( +([^ ].*))?$/; @@ -656,9 +660,8 @@ sub split_url { else { $data{'authority'} = $url; if ($data{'authority'} =~ m/[\/\?#]/g) { - $ind = pos($data{'authority'})-1; - $url = substr($data{'authority'}, $ind); - $data{'authority'} = substr($data{'authority'}, 0, $ind); + $data{'authority'} = $`; + $url = $&.$'; } else { $url = ''; @@ -677,9 +680,8 @@ sub split_url { } $data{'host'} =~ m/\[[^\]]*\]/g; if ($data{'host'} =~ m/:/g) { - $ind = pos($data{'host'})-1; - $data{'port'} = substr($data{'host'}, $ind+1); - $data{'host'} = substr($data{'host'}, 0, $ind); + $data{'host'} = $`; + $data{'port'} = $'; } else { $data{'port'} = ''; @@ -689,9 +691,8 @@ sub split_url { if (($url =~ /^\//) or $relative) { $data{'path'} = $url; if ($data{'path'} =~ m/[\?#]/g) { - $ind = pos($data{'path'})-1; - $url = substr($data{'path'}, $ind); - $data{'path'} = substr($data{'path'}, 0, $ind); + $data{'path'} = $`; + $url = $&.$'; } else { $url = ''; @@ -703,7 +704,7 @@ sub split_url { # query if ($url =~ /^\?/) { - $data{'query'} = substr($url, 1); + $data{'query'} = $'; $ind = index($data{'query'}, '#'); if ($ind >= 0) { $url = substr($data{'query'}, $ind); @@ -719,7 +720,7 @@ sub split_url { # fragment if ($url =~ /^#/) { - $data{'fragment'} = substr($url, 1); + $data{'fragment'} = $'; } else { $data{'fragment'} = ''; @@ -3304,17 +3305,17 @@ sub html_entity_decode { $d .= $1; $t = $2; } - elsif ($t =~ /^(\&#?[A-Za-z0-9]+;)(.*)$/s) { # correct encoded character - $d .= html_entity_decode_1en($1); - $t = $2; + elsif ($t =~ /^\&#?[A-Za-z0-9]+;/s) { # correct encoded character + $d .= html_entity_decode_1en($&); + $t = $'; } - elsif ($t =~ /^(\&[A-Za-z0-9]+)(.*)$/s) { # encoded character without ";" - $d .= html_entity_decode_1en($1); - $t = $2; + elsif ($t =~ /^\&[A-Za-z0-9]+/s) { # encoded character without ";" + $d .= html_entity_decode_1en($&); + $t = $'; } - elsif ($t =~ /^(\&)(.*)$/s) { # invalid "&" - $d .= $1; - $t = $2; + elsif ($t =~ /^\&/s) { # invalid "&" + $d .= $&; + $t = $'; } else { # nothing left to decode $d .= $t; @@ -3336,7 +3337,8 @@ sub html_entity_decode_1en { if ($en !~ /;$/) { # name without ";" my $n = substr($en, 1); - # we HAVE TO iterate :/ + # we HAVE TO iterate as we don't know where name terminates :/ + # why they thought this is a good idea ? foreach my $name (keys %{+HTML_ENTITY_CODE_INF}) { if (index($n, $name) == 0) { # name (beginning of entire string) is valid: # decode