]> bicyclesonthemoon.info Git - botm/common-perl/commitdiff
improve regexp handling in some functions v1.0.22
authorb <rowerynaksiezycu@gmail.com>
Sat, 30 Sep 2023 20:14:15 +0000 (20:14 +0000)
committerb <rowerynaksiezycu@gmail.com>
Sat, 30 Sep 2023 20:14:15 +0000 (20:14 +0000)
botm_common.pm

index 82c62a626aa70c1f2aef798ee930c272916621b0..6d76275fa970d94e5a91e4824ae5f6d73d3ae542 100644 (file)
@@ -25,7 +25,7 @@ use Encode ('encode', 'decode');
 
 use Exporter;
 
-our $VERSION     = '1.0.21';
+our $VERSION     = '1.0.22';
 our @ISA         = qw(Exporter);
 our @EXPORT      = ();
 our @EXPORT_OK   = (
@@ -113,6 +113,10 @@ sub make_temp_path {
        return join_path('/', $dir, $filename);
 }
 
+###################################
+##  ENCODING + SYSTEM FUNCTIONS  ##
+###################################
+
 sub system_encoded {
        (my $cmd, my @arg) = @_;
        my @newarg;
@@ -235,8 +239,8 @@ sub read_data_file {
                        next;
                }
                
-               $line =~ s/[\n]$//g;
-               $line =~ s/[\r]$//g;
+               $line =~ s/\n$//gs;
+               $line =~ s/\r$//gs;
                
                # Empty line - end of header.
                if ($line eq ''){
@@ -244,15 +248,15 @@ sub read_data_file {
                }
                # Line starts with whitespace. It's a continuation of the previous line.
                # Concatenate the field value, separated by newline.
-               elsif($line =~ /^[ \t](.*)$/){
+               elsif($line =~ /^[ \t]/){
                        if($lastname ne '') {
-                               $data{$lastname} .= "\n".$1;
+                               $data{$lastname} .= "\n".$';
                        }
                }
                # Line starts with a name followed by colon/equal sign. Save the value
-               elsif ($line =~ /^([ -9;-<>-~]+)((:[ \t])|=)(.*)$/) {
+               elsif ($line =~ /^([ -9;-<>-~]+)((:[ \t])|=)/s) {
                        $name = lc($1);
-                       $value = $4;
+                       $value = $';
                        
                        $data{$name} = $value;
                        
@@ -317,8 +321,8 @@ sub write_data_file {
                        }
                        my $value = $data->{$ind};
                        # convert newlines - add spaces at continuation line
-                       $value =~ s/\r//g;
-                       $value =~ s/\n/\n /g;
+                       $value =~ s/(\r)?\n/\n /g;
+                       $value =~ s/\r/\n /g;
                        print $fh "$name: $value\n";
                }
        }
@@ -432,8 +436,8 @@ sub read_header_file {
        my $lastname='';
        
        while (defined(my $line = <$fh>)) {
-               $line =~ s/[\n]$//g;
-               $line =~ s/[\r]$//g;
+               $line =~ s/\n$//gs;
+               $line =~ s/\r$//gs;
                
                if ($status_line) {
                        $line =~ /^([^ ]+) +([^ ]+)( +([^ ].*))?$/;
@@ -656,9 +660,8 @@ sub split_url {
        else {
                $data{'authority'} = $url;
                if ($data{'authority'} =~ m/[\/\?#]/g) {
-                       $ind = pos($data{'authority'})-1;
-                       $url = substr($data{'authority'}, $ind); 
-                       $data{'authority'} = substr($data{'authority'}, 0, $ind);
+                       $data{'authority'} = $`;
+                       $url = $&.$';
                }
                else {
                        $url = '';
@@ -677,9 +680,8 @@ sub split_url {
        }
        $data{'host'} =~ m/\[[^\]]*\]/g;
        if ($data{'host'} =~ m/:/g) {
-               $ind = pos($data{'host'})-1;
-               $data{'port'} = substr($data{'host'}, $ind+1);
-               $data{'host'} = substr($data{'host'}, 0, $ind);
+               $data{'host'} = $`;
+               $data{'port'} = $';
        }
        else {
                $data{'port'} = '';
@@ -689,9 +691,8 @@ sub split_url {
        if (($url =~ /^\//) or $relative) {
                $data{'path'} = $url;
                if ($data{'path'} =~ m/[\?#]/g) {
-                       $ind = pos($data{'path'})-1;
-                       $url = substr($data{'path'}, $ind);
-                       $data{'path'} = substr($data{'path'}, 0, $ind);
+                       $data{'path'} = $`;
+                       $url = $&.$';
                }
                else {
                        $url = '';
@@ -703,7 +704,7 @@ sub split_url {
        
        # query
        if ($url =~ /^\?/) {
-               $data{'query'} = substr($url, 1);
+               $data{'query'} = $';
                $ind = index($data{'query'}, '#');
                if ($ind >= 0) {
                        $url = substr($data{'query'}, $ind);
@@ -719,7 +720,7 @@ sub split_url {
        
        # fragment
        if ($url =~ /^#/) {
-               $data{'fragment'} = substr($url, 1);
+               $data{'fragment'} = $';
        }
        else {
                $data{'fragment'} = '';
@@ -3304,17 +3305,17 @@ sub html_entity_decode {
                        $d .= $1;
                        $t = $2;
                }
-               elsif ($t =~ /^(\&#?[A-Za-z0-9]+;)(.*)$/s) { # correct encoded character
-                       $d .= html_entity_decode_1en($1);
-                       $t = $2;
+               elsif ($t =~ /^\&#?[A-Za-z0-9]+;/s) { # correct encoded character
+                       $d .= html_entity_decode_1en($&);
+                       $t = $';
                }
-               elsif ($t =~ /^(\&[A-Za-z0-9]+)(.*)$/s) { # encoded character without ";"
-                       $d .= html_entity_decode_1en($1);
-                       $t = $2;
+               elsif ($t =~ /^\&[A-Za-z0-9]+/s) { # encoded character without ";"
+                       $d .= html_entity_decode_1en($&);
+                       $t = $';
                }
-               elsif ($t =~ /^(\&)(.*)$/s) { # invalid "&"
-                       $d .= $1;
-                       $t = $2;
+               elsif ($t =~ /^\&/s) { # invalid "&"
+                       $d .= $&;
+                       $t = $';
                }
                else {  # nothing left to decode
                        $d .= $t;
@@ -3336,7 +3337,8 @@ sub html_entity_decode_1en {
        
        if ($en !~ /;$/) { # name without ";"
                my $n = substr($en, 1);
-               # we HAVE TO iterate :/
+               # we HAVE TO iterate as we don't know where name terminates :/
+               # why they thought this is a good idea ?
                foreach my $name (keys %{+HTML_ENTITY_CODE_INF}) {
                        if (index($n, $name) == 0) { # name (beginning of entire string) is valid:
                                # decode