From: b Date: Tue, 20 Feb 2024 00:22:43 +0000 (+0000) Subject: optimised pattern replacement to work much faster X-Git-Tag: v1.2.3 X-Git-Url: http://bicyclesonthemoon.info/git-projects/?a=commitdiff_plain;h=7154627e28ef027582050f54ea47e006ec8fad50;p=botm%2Fconfig optimised pattern replacement to work much faster --- diff --git a/configure.1.pl b/configure.1.pl index f9d4618..676a4cb 100755 --- a/configure.1.pl +++ b/configure.1.pl @@ -80,6 +80,7 @@ # * --d, --debug - will print debug information to standard error # * --da, --debugall, --debug_all - as above but will also include debug # information produced after finished processing the config files +# (at this moment equivalent to --debug but left for compatibility) # * --l, --list - will print the final list of all settings to standard # error # * --la, --listall, --list_all - as above but will include settings not @@ -298,6 +299,9 @@ # just the pattern. Can be overwritten. Default value '###$0;' # $0 - the name of the setting # More functions and values can be added in future versions. +# $_REPLACE_LINE and $_REPLACE_KEYWORD are NOT evaluated the same way +# as normal patterns. Instead they are converted to regexp patterns where +# $0 is replaced with the keyword regexp. # # The settings can be conditional thanks to using statements like: # if @@ -429,17 +433,18 @@ my $list = 0; my $list_all = 0; my $file_type = ''; my $verbose = 0; - -$cfg{REPLACE_LINE()} = DEFAULT_REPLACE_LINE; -$cfg{REPLACE_KEYWORD()} = DEFAULT_REPLACE_KEYWORD; -$cfg{PATH_SEPARATOR()} = DEFAULT_PATH_SEPARATOR; - +my $replace_line; +my $replace_keyword; my $encoding = ''; my $encoding_file = ''; my $encoding_configfile = ''; my $encoding_stdin; my $encoding_stdout; +$cfg{REPLACE_LINE()} = DEFAULT_REPLACE_LINE; +$cfg{REPLACE_KEYWORD()} = DEFAULT_REPLACE_KEYWORD; +$cfg{PATH_SEPARATOR()} = DEFAULT_PATH_SEPARATOR; + decode_argv(); foreach my $arg (@ARGV) { @@ -520,31 +525,34 @@ binmode STDERR, ":encoding($encoding_stdout)"; foreach my $file (@config_files) { %cfg = parse_file($file, $encoding_configfile, 0, %cfg); } + +$replace_line = make_pattern_regexp($cfg{REPLACE_LINE() }); +$replace_keyword = make_pattern_regexp($cfg{REPLACE_KEYWORD()}); + +print_debug(0, "REPLACE_LINE REGEXP $replace_line"); +print_debug(0, "REPLACE_KEYWORD REGEXP $replace_keyword"); + unless ($debug_all) { $debug_enabled = 0; } -my %replace_line; -my %replace_keyword; -my $debug = ''; -my $debug_all = ''; -my $match_line; -my $match_keyword; - foreach my $key (keys %cfg) { - my $print_this = $list_all; - unless((substr($key,0,1) eq '_') or ($key =~ /^[0-9]+$/)){ - $print_this = ($list or $list_all); - $match_line = parse_pattern(REPLACE_LINE(), "($key)", 0, %cfg); - $match_keyword = parse_pattern(REPLACE_KEYWORD(), "($key)", 0, %cfg); - $replace_line{$match_line} = $cfg{$key}; - $replace_keyword{$match_keyword} = $cfg{$key}; + if ( + (substr($key,0,1) eq '_') or + ($key =~ /^[0-9]+$/) + ) { + if ($list_all) { + print STDERR format_cfg($key, $cfg{$key}); + } + delete %cfg{$key}; + next; } - if ($print_this) { + if ($list or $list_all) { print STDERR format_cfg($key, $cfg{$key}); } } + if (@input_files == 0) { push @input_files, ''; } @@ -577,28 +585,59 @@ sub replace { return (1, $before.$replacement.$after); } -# replace but with mask to avoid duplicated replacement -sub replace_masked { - (my $text, my $mask, my $to_replace, my $replacement) = @_; +# # replace but with mask to avoid duplicated replacement +# sub replace_masked { + # (my $text, my $mask, my $to_replace, my $replacement) = @_; - my $ind = index($mask, $to_replace); - if ($ind <0) { - return (0, $text, $mask); - } - my $len = length($to_replace); + # my $ind = index($mask, $to_replace); + # if ($ind <0) { + # return (0, $text, $mask); + # } + # my $len = length($to_replace); - my $text_before = substr($text, 0, $ind); - my $mask_before = substr($mask, 0, $ind); - my $mask_insert = "\r" x length($replacement); # CR can never appear in $text - my $text_after = substr($text, $ind+$len); - my $mask_after = substr($mask, $ind+$len); + # my $text_before = substr($text, 0, $ind); + # my $mask_before = substr($mask, 0, $ind); + # my $mask_insert = "\r" x length($replacement); # CR can never appear in $text + # my $text_after = substr($text, $ind+$len); + # my $mask_after = substr($mask, $ind+$len); - print_verbose(0, 1, "$to_replace -> $replacement"); + # print_verbose(0, 1, "$to_replace -> $replacement"); - $text = $text_before.$replacement.$text_after; - $mask = $mask_before.$mask_insert.$mask_after; + # $text = $text_before.$replacement.$text_after; + # $mask = $mask_before.$mask_insert.$mask_after; - return (1, $text, $mask); + # return (1, $text, $mask); +# } + +# do the pattern replacement +sub apply_pattern { + (my $name, my $pattern, my $fallback) = @_; + if (defined($cfg{$name})) { + print_verbose(0, 1, "$pattern -> $cfg{$name}"); + return $cfg{$name}; + } + else { + return $fallback; + } +} + +# make the regexp for replacement patterns +sub make_pattern_regexp { + (my $pattern) = @_; + + my $ind = index($pattern, '$0'); + if ($ind <0) { + return ''; + } + my $before = substr($pattern, 0, $ind); + my $after = substr($pattern, $ind+2); + + $before =~ s/[\\\^\.\$\|\(\)\[\]\*\+\?\{\}\-\#]/\\$&/gs; + $after =~ s/[\\\^\.\$\|\(\)\[\]\*\+\?\{\}\-\#]/\\$&/gs; + + $pattern = $before . '([A-Za-z0-9_\\-\\.]+)' . $after; + $pattern = qr/$pattern/; + return $pattern; } sub print_debug { @@ -1134,7 +1173,7 @@ sub parse_file { sub convert_file { (my $in, my $out, my $encoding) = @_; - print_verbose(0, 0, $in.' -> '.$out); + print_verbose(0, 0, "$in. -> .$out"); my $ref_in = ref($in); my $ref_out = ref($out); @@ -1171,21 +1210,17 @@ sub convert_file { # TODO: This is extremely inefficient. and does not scale well. Improve! my $status; LINE: while (defined(my $line = <$in>)) { - $line =~ s/[\r\n]//g; - foreach my $key (keys %replace_line) { - if (index($line, $key) >= 0) { - my $replace = $replace_line{$key}; - print_verbose(0, 1, "$key -> $replace"); - print $out "$replace\n"; + + while ($line =~ /$replace_line/gs) { + my $new_line = apply_pattern($1, $&, undef); + if (defined($new_line)) { + print $out "$new_line\n"; next LINE; } } - my $mask = $line; - foreach my $key (keys %replace_keyword) { - do { - ($status, $line, $mask) = replace_masked($line, $mask, $key, $replace_keyword{$key}); - } while ($status); - } + $line =~ s/[\r\n]//g; + $line =~ s/$replace_keyword/apply_pattern($1,$&,$&)/gse; + print $out "$line\n"; }