1 ###RUN_PERL: #!/usr/bin/perl
4 # The new BOTM configuration tool
6 # Copyright (C) 2022, 2023, 2024 Balthasar SzczepaĆski
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU Affero General Public License as
10 # published by the Free Software Foundation, either version 3 of the
11 # License, or (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU Affero General Public License for more details.
18 # You should have received a copy of the GNU Affero General Public License
19 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 # This is a tool for managing configuration in a project.
22 # It was initially based on my configuration script from the BSTA software
23 # but then I have rewriten it from beginning with new design goals.
26 # - tool to read confinguration files and then apply changes in
27 # project's source files
28 # - configure the project at compile time, easy cooperation with makefile
29 # - possible to keep separately:
30 # - the actual important settins
31 # - the settings derived from them
32 # - the configuration tool
33 # - the files to configure
37 # 1. You write a program. You are silently making assumptions. Whenever
38 # you need a path or other dependency, you define ("hard-code") it there.
39 # Acceptable for a simple, short, single-use program maybe.
40 # But terrible for anything more serious. Maintenance and portability
41 # becomes difficult. When you want to change anything you have to look
42 # through the whole code to find all places where something is defined.
43 # A lot of effort and easy to make a mistake.
45 # 2. Instead of hard-coding everything everywhere you can define the assumed
46 # settings at the begnning of each file using 'const' or '#define' or
47 # whatever the programming language allows. Even better if you can put the
48 # definitions into a single file and include them where needed.
49 # Much better but still for some big projects with many files it takes some
50 # time and effort to go through all of them. Even if all you wanted was
51 # to adapt the project to run on a different system for example.
53 # 3. Ok, you could have all definitions in one place and then apply them
54 # to all the files. One way to do it is to make a script which will read
55 # the settings from a file and then insert '#define' (C), 'use constant'
56 # (Perl), or other statements into the files based on pattern matching.
58 # This is for configuration done at compile time. Not at run time.
59 # There the program itself has to read the settings and make decisions.
61 # There also exist things like autoconf, automake, etc. see:
62 # http://www.mrob.com/pub/comp/unix-building-history.html
63 # However for many project I'm currently dealing with, this script
64 # here is totally enough and so far I did not have to learn these tools.
65 # They are still a black box (or even black magic) to me.
69 # perl configure.pl configfile1 configfile2 ... < input_file > output_file
71 # perl configure.pl --option1 ... name=value ... configfile1 configfile2 ... --input inputfile1 inputfile2 ... --output outputfile1 outputfile2
73 # Or './configure.pl' instead of 'perl configure.pl'.
75 # There are 3 types of commandline parameters:
76 # * in form 'name=value' where 'name' is a valid setting name (described
77 # further below): this setting will be loaded from commandline directly
78 # (and literally - no processing) before reading any configuration files
79 # * starting from '--': these are options:
80 # * --d, --debug - will print debug information to standard error
81 # * --da, --debugall, --debug_all - as above but will also include debug
82 # information produced after finished processing the config files
83 # (at this moment equivalent to --debug but left for compatibility)
84 # * --l, --list - will print the final list of all settings to standard
86 # * --la, --listall, --list_all - as above but will include settings not
87 # available for applying in files (starting with '_')
88 # * --v, --vv - will print information about processed files and patterns
89 # to standard error. Number of 'v's is the verbose level.
90 # * --do, --debug_stdout - all debug information will go to standard output
91 # * --de, --debug_stderr - all debug information will go to standard error
92 # * --i, --in, --input - all further file paths are for input files
93 # * --o, --out, --output - all further file paths are for output files
94 # * -- - all further file paths are for configuration files
95 # * --e=, --encoding= - sets the character encoding for input, output,
97 # * --ef=, --encoding-file= - sets character encoding for files only,
99 # * --ecf, --ecnoding-configfile= - sets character encoding for configuration
100 # files only, overrides --ef
101 # * anything else will be treated as a path to a file: input, output,
102 # or configuration. By default configuration files are expected.
104 # Unless defined otherwise, the character encoding for all files is UTF-8,
105 # and for standard input/output it's the encoding determined from locale.
106 # for parsing command line arguments and for opening files always
107 # system locale is used, this can not be overridded.
109 # The script reads configuration from all configuration files given in the
110 # command line parameters. The effect is similar to reading a single
113 # Configuration files are read once and in the same order as the command line
116 # Settings provided directly in command line are read before the files. So the
117 # files can override such settings.
119 # Next, input files are read, converted, and saved to output files.
121 # The input and output files are procesed in the same order as the command line
122 # parameters. First input file is converted to first output file and so on.
123 # If there are more output files than input files, then the extra output
124 # files will not be written.
125 # If there are less output files than input files, then instead of the missing
126 # files the standard output is used.
127 # If there are no input files, then instead the standard input is used (as if
130 # It reads input line by line.
131 # In each line it looks for replacement patterns, replaces them if found,
132 # and prints the line to the output.
134 # Whenever I write 'whitespace' I mean it can be space ' ' or TAB '\t'.
136 # It is possible to include files like this:
137 # include path_to_include
138 # The include statement must start from the first column in the line.
139 # It is equivalent to replacing the line (with the include statement)
140 # by the content of the included file
142 # settings can be defined in two ways:
145 # These must start from the first column in the line.
146 # If a line starts with a whitespace it is treated as a continuation
152 # The name consists of the characters A-Z, a-z, 0-9, '-', '_', and '.'.
154 # In the first format (name: value):
155 # - Any amount (including 0) of whitespace is allowed before the ':'.
156 # - The value starts immediately after the first whitespace after the ':'
157 # and is taken directly without any processing.
159 # In the second format (name = value) some processing is performed:
160 # - Any amount (including 0) of whitespace is allowed before and
162 # - Comments starting from '#' are removed.
163 # - whitespace at beginning and end of each line are removed
164 # - escaped characters (by '\') are processed.
165 # - other settings inserted by $name are expanded.
167 # It is possible to insert one setting's value into another one like this:
170 # In this example the value of s2 is '123 456 789'.
171 # This will not work:
174 # When processing s2 $s1 is not defined yet so the value of s2 is:
177 # It is possible to use a value as a function pattern:
180 # In this example the value of s2 is: 'a = "b"'.
181 # Here names made entirely from digits are used as the parameters of the
182 # function pattern and should not be used for other purposes. Possible but
183 # not recommended. another special name is $_, which joins all parameters
186 # If not provided, the parameters will be treated as empty string.
190 # the value of s2 is: 'a = ""'.
192 # Even though $0 and $1 were not processed when s1 was defined, they
193 # were processed when s1 was used as a function pattern.
198 # s1 will be immediately evaluated to ' = ""', and s2 will also become
201 # Escaping will protect $0 and $2 to be processed too early:
204 # Here the value of s1 is '$0 = "$1"' again.
206 # The pattern function calls can be nested:
210 # In this example this is how it will be processed:
211 # processing s3, '@s2(@s1(1))'
212 # found call to s2 with parameters '(@s1(1))'
213 # processing s2, '<$0>', '(@s1(1))'
214 # processing parameters '(@s1(1))'
215 # found parameter '@s1(1)'
216 # processing '@s1(1)'
217 # found call to s1 with parameters '(1)'
218 # processing s1, '>$0<', '(1)'
219 # processing parameters '(1)'
220 # found parameter '1'
225 # found call to $0 with no parameters
226 # replace $0 with '1'
229 # replace $s1() with '>1<'
234 # found call to $0 with no parameters
235 # replace $0 with '>1<'
236 # value is now '<>1<>'
238 # replace $2() with '<>1<>'
239 # value is now '<>1<>'
242 # So when processing a value, if a pattern function call is found,
243 # then first, the parameters are separated, and each is processed one
244 # recursion level deeper.
245 # then the pattern is processed (also one recursion level deeper than the
246 # base value) and the $0, etc. are replaced by the (already processed)
249 # The parameter list is used for '@name' and '$name' even if '$' doesn't
250 # take any parameters.
251 # Empty parameter list can be useful for separating from text afterwards:
254 # Here the value of s2 is: '123456789'.
258 # Here the value of s2 is: '123' because $s1789 was not found.
260 # Escaping the '(' prevents interpreting it as some parameters:
262 # s2 = (123)$s1\(789)
263 # Here the value of s2 is: '(123)456(789)'.
267 # Here the value of s2 is: '(123)456' because '789' was given to s1
270 # Function patterns can also be given as parameters.
275 # Here the value of s3 is '(a)_(b)'.
277 # There are special functions and values:
278 # @_ESCAPE is a function which will escape some characters with '\'.
279 # $0 - the text to be escaped
280 # $1 - the regexp which specifies which characters should be escaped
281 # if not specified then a default regexp is used
282 # @_URL_ENCODE is a function which will perform percent-encoding
283 # $0 - the text to be encoded
284 # $1 - the regexp which specifies which characters should be encoded
285 # if not specified then a default regexp is used
286 # @_HT_ENCODE is a function which will perform HTML entity encoding
287 # $0 - the text to be encoded
288 # $1 - the regexp which specifies which characters should be encoded
289 # if not specified then a default regexp is used
290 # @_SPRINTF is a call to the Perl sprintf() function
291 # $0 - the format string
292 # $1 and continuing - the parameters to sprintf()
293 # @_PATH is a function which will join a path from segments
294 # $0 and continuing - the segments to be joined
295 # $_PATH_SEPATATOR is the path separator used by $_PATH.
296 # can be overwritten, default value '/'.
297 # $_REPLACE_LINE is the pattern to look for in files, to replace the
298 # entire line. Can be overwritten. Default value '###$0:'
299 # $0 - the name of the setting
300 # $_REPLACE_KEYWORD is the pattern to look for in files, to replace
301 # just the pattern. Can be overwritten. Default value '###$0;'
302 # $0 - the name of the setting
303 # More functions and values can be added in future versions.
304 # $_REPLACE_LINE and $_REPLACE_KEYWORD are NOT evaluated the same way
305 # as normal patterns. Instead they are converted to regexp patterns where
306 # $0 is replaced with the keyword regexp.
308 # The settings can be conditional thanks to using statements like:
313 # Anything after the "if" keyword is the codition to check.
314 # Condition is considered true if its value is not an empty string
315 # Condition is parsed the same way as a dynamic setting definition
317 # Conditional statements must start at from first column in the line.
318 # Conditional statements can be nested
330 # Here the value of s3 is '1'.
333 # Inserting settings to source files:
335 # After all settings are read, the actual source is processed line by line.
336 # In each line the tool will look for patterns to replace the entire line.
337 # The patterns are built by inserting settings names to $REPLACE_LINE.
339 # REPLACE_LINE: ###$0:
341 # then each line containing '###s1:' will be replaced by 'ABC', and so on.
342 # The patterns are checked in unpredictable order (because Perl hash).
343 # If any match is found, the line is replaced and the tool moves to next line.
344 # If none of the line patterns are matched then comes next step:
345 # The tool will now look for patterns to replace in the line.
346 # The patterns are built by inserting settings names to $REPLACE_KEYWORD.
348 # REPLACE_KEYWORD: ###$0;
350 # then each occurence of '###s3:' will be replaced by 'XYZ', and so on.
351 # The patterns are checked in unpredictable order (because Perl hash)
355 # Either install the tool somewhere on the system and multiple projects
356 # can call it, or copy the tool as part of the project
358 # See the makefile of the tool's project as a simple example.
360 # Of course if you found this tool as part of a different project then
361 # you probably will not see the original makefile.
362 # Instead maybe check how the tool is used in the project.
366 # It's a good idea to use the tool together with the makefile, to compile
369 # Generate source files and then use them:
374 # abc.c: abc.1.c settings.txt configure.pl
375 # perl configure.pl settings.txt < abd.1.c > abc.c
377 # Of course not only the source files but also the makefile itself can be
380 # makefile: makefile.1.mak settings.txt configure.pl
381 # perl configure.pl settings.txt < makefile.1.mak > makefile
383 # If the settings have changed then any attept to make something will
384 # first regenerate the makefile and then use it.
386 # Use different settings for different targets:
392 # SETTINGS = settings-$(TARGET).txt settings.txt
394 # makefile: makefile.1.mak $(SETTINGS) configure.pl
395 # perl configure.pl $(SETTINGS) < makefile.1.mak > makefile
398 # make TARGET=targetname something_to_make
400 # Or even replace the default target by a configurable pattern.
401 # then, to set a new target use:
402 # make TARGET=targetname -B makefile
403 # And afterwards the new tatget will be the default one.
407 use Encode::Locale ('decode_argv');
408 use Encode ('encode', 'decode');
410 use constant MAX_DEPTH => 256;
411 use constant KEYWORD_PATTERN => '[A-Za-z0-9_\-\.]+';
412 use constant DEFAULT_PATH_SEPARATOR => '/';
413 use constant DEFAULT_REPLACE_LINE => '###$0:';
414 use constant DEFAULT_REPLACE_KEYWORD => '###$0;';
416 use constant REPLACE_LINE => '_REPLACE_LINE';
417 use constant REPLACE_KEYWORD => '_REPLACE_KEYWORD';
418 use constant PATH_SEPARATOR => '_PATH_SEPARATOR';
420 use constant ESCAPE => '_ESCAPE';
421 use constant URL_ENCODE => '_URL_ENCODE';
422 use constant HT_ENCODE => '_HT_ENCODE';
423 use constant PATH => '_PATH';
424 use constant SPRINTF => '_SPRINTF';
426 # TODO more special functions
432 my $debug_enabled = 0;
441 my $encoding_file = '';
442 my $encoding_configfile = '';
445 my $debug_out = \*STDERR;
447 $cfg{REPLACE_LINE()} = DEFAULT_REPLACE_LINE;
448 $cfg{REPLACE_KEYWORD()} = DEFAULT_REPLACE_KEYWORD;
449 $cfg{PATH_SEPARATOR()} = DEFAULT_PATH_SEPARATOR;
453 foreach my $arg (@ARGV) {
454 if ($arg =~ /^(--?)(.*)$/) { # option
456 if (($1 eq '-') and (length($arg)>1)) {
459 elsif ($arg =~ /^d(ebug)?$/) {
462 elsif ($arg =~ /^(da)|(debug[_\-]?all)$/) {
466 elsif ($arg =~ /^l(ist)?$/) {
469 elsif ($arg =~ /^(la)|(list[_\-]?all)$/) {
473 elsif ($arg =~ /^v+$/) {
474 $verbose += length($&);
476 elsif ($arg =~ /^(do)|(debug[_\-]?stdout)$/) {
477 $debug_out = \*STDOUT;
479 elsif ($arg =~ /^(de)|(debug[_\-]?stderr)$/) {
480 $debug_out = \*STDERR;
482 elsif ($arg =~ /^i(n(put)?)?$/) {
485 elsif ($arg =~ /^o(ut(put)?)?$/) {
488 elsif ($arg = /^e(ncoding)?=(.*)$/) {
491 elsif ($arg = /^(ef)|(encoding-file)=(.*)$/) {
494 elsif ($arg = /^(ecf?)|(encoding-configfile)=(.*)$/) {
495 $encoding_configfile = $3;
501 elsif ($arg =~ /^([A-Za-z0-9_\-\.]+)=(.*)$/) { # predefined setting
503 print_verbose(0, 1, "$1: $2");
506 if ($file_type eq 'i') {
507 push @input_files, $arg;
509 elsif ($file_type eq 'o') {
510 push @output_files, $arg;
513 push @config_files, $arg;
518 if ($encoding eq '') {
520 $encoding_stdin = 'console_in';
521 $encoding_stdout = 'console_out';
524 $encoding_stdin = $encoding;
525 $encoding_stdout = $encoding;
527 if ($encoding_file eq '') {
528 $encoding_file = $encoding;
530 if ($encoding_configfile eq '') {
531 $encoding_configfile = $encoding_file;
534 binmode STDIN, ":encoding($encoding_stdin)";
535 binmode STDOUT, ":encoding($encoding_stdout)";
536 binmode STDERR, ":encoding($encoding_stdout)";
538 foreach my $file (@config_files) {
539 %cfg = parse_file($file, $encoding_configfile, 0, %cfg);
542 $replace_line = make_pattern_regexp($cfg{REPLACE_LINE() });
543 $replace_keyword = make_pattern_regexp($cfg{REPLACE_KEYWORD()});
545 print_debug(0, "REPLACE_LINE REGEXP $replace_line");
546 print_debug(0, "REPLACE_KEYWORD REGEXP $replace_keyword");
548 unless ($debug_all) {
552 foreach my $key (keys %cfg) {
554 (substr($key,0,1) eq '_') or
558 print $debug_out format_cfg($key, $cfg{$key});
563 if ($list or $list_all) {
564 print $debug_out format_cfg($key, $cfg{$key});
569 if (@input_files == 0) {
570 push @input_files, '';
573 while (@input_files > 0) {
574 my $in = shift @input_files;
575 my $out = shift @output_files;
576 convert_file($in, $out, $encoding_file);
580 (my $name, my $value) = @_;
581 $value =~ s/\n/\n /g;
582 return $name.': '.$value."\n";
585 # replace WITHOUT REGEXP
587 (my $text, my $to_replace, my $replacement) = @_;
589 my $ind = index($text, $to_replace);
593 my $len = length($to_replace);
595 my $before = substr($text, 0, $ind);
596 my $after = substr($text, $ind+$len);
598 return (1, $before.$replacement.$after);
601 # # replace but with mask to avoid duplicated replacement
602 # sub replace_masked {
603 # (my $text, my $mask, my $to_replace, my $replacement) = @_;
605 # my $ind = index($mask, $to_replace);
607 # return (0, $text, $mask);
609 # my $len = length($to_replace);
611 # my $text_before = substr($text, 0, $ind);
612 # my $mask_before = substr($mask, 0, $ind);
613 # my $mask_insert = "\r" x length($replacement); # CR can never appear in $text
614 # my $text_after = substr($text, $ind+$len);
615 # my $mask_after = substr($mask, $ind+$len);
617 # print_verbose(0, 1, "$to_replace -> $replacement");
619 # $text = $text_before.$replacement.$text_after;
620 # $mask = $mask_before.$mask_insert.$mask_after;
622 # return (1, $text, $mask);
625 # do the pattern replacement
627 (my $name, my $pattern, my $fallback) = @_;
628 if (defined($cfg{$name})) {
629 print_verbose(0, 1, "$pattern -> $cfg{$name}");
637 # make the regexp for replacement patterns
638 sub make_pattern_regexp {
641 my $ind = index($pattern, '$0');
645 my $before = substr($pattern, 0, $ind);
646 my $after = substr($pattern, $ind+2);
648 $before =~ s/[\\\^\.\$\|\(\)\[\]\*\+\?\{\}\-\#]/\\$&/gs;
649 $after =~ s/[\\\^\.\$\|\(\)\[\]\*\+\?\{\}\-\#]/\\$&/gs;
651 $pattern = $before . '([A-Za-z0-9_\\-\\.]+)' . $after;
652 $pattern = qr/$pattern/;
657 (my $depth, my $text) = @_;
658 if ($debug_enabled) {
659 # $debug_text .= (' 'x$depth).$text."\n";
660 print $debug_out (' 'x$depth).$text."\n";
665 (my $depth, my $level, my $text) = @_;
666 if ($level < $verbose) {
667 print $debug_out (' 'x($depth + $level)).$text."\n";
672 (my $to_escape) = @_;
674 if ($to_escape eq 'a') { return "\a";}
675 elsif ($to_escape eq 'b') { return "\b";}
676 elsif ($to_escape eq 'e') { return "\e";}
677 elsif ($to_escape eq 'f') { return "\f";}
678 elsif ($to_escape eq 'n') { return "\n";}
679 elsif ($to_escape eq 'r') { return "\r";}
680 elsif ($to_escape eq 't') { return "\t";}
681 else { return $to_escape;}
685 (my $text, my $match) = @_;
686 unless (defined $match) {
687 $match = '[\\\\\\\'\\\"]';
693 foreach my $ch (split('', $text)) {
695 if ($ch eq "\a") { $outcome .= '\\a';}
696 elsif ($ch eq "\b") { $outcome .= '\\b';}
697 elsif ($ch eq "\e") { $outcome .= '\\e';}
698 elsif ($ch eq "\f") { $outcome .= '\\f';}
699 elsif ($ch eq "\n") { $outcome .= '\\n';}
700 elsif ($ch eq "\r") { $outcome .= '\\r';}
701 elsif ($ch eq "\t") { $outcome .= '\\t';}
702 else { $outcome .= '\\'.$ch;}
712 # TODO: this is almost duplicated from the common library
714 (my $text, my $match) = @_;
715 unless (defined $match) {
716 $match = '[^0-9A-Za-z.~\-_]'
720 foreach my $ch (split('', $text)) {
722 my $enc = encode('UTF-8', $ch);
723 foreach my $b (split('', $enc)) {
724 $outcome .= sprintf('%%%02hX',ord($b));
734 # TODO: this is almost duplicated from the common library
736 (my $text, my $match) = @_;
737 unless (defined $match) {
742 foreach my $ch (split('', $text)) {
744 $outcome .= sprintf('&#%02hu;',ord($ch));
753 # TODO: this is duplicated from the common library
755 (my $joiner, my @segments) = @_;
758 foreach my $segment (@segments) {
763 unless (substr ($path, -1) eq $joiner) {
766 if (substr ($segment, 0, 1) eq $joiner) {
777 sub get_pattern_parameters {
784 while ($text ne '') {
785 my $ch = substr($text, 0, 1);
786 my $rest = substr($text, 1);
804 elsif ($ch eq '\\') {
815 return ($text, $parameters);
818 sub parse_pattern_parameters {
819 (my $text, my $depth, my %cfg) = @_;
821 print_debug($depth, "PARSE PARAMETERS $text");
824 my $current_parameter = '';
828 while ($text ne '') {
829 my $ch = substr($text, 0, 1);
830 my $rest = substr($text, 1);
833 $current_parameter .= $ch;
838 $current_parameter .= $ch;
849 print_debug($depth, "FOUND PARAMETER $current_parameter");
850 $current_parameter = parse_value($current_parameter, $depth+1, %cfg);
851 print_debug($depth, "ADD PARAMETER ".scalar(@parameters)." $current_parameter");
852 push @parameters, $current_parameter;
853 $current_parameter = '';
856 $current_parameter .= $ch;
859 elsif ($ch eq '\\') {
860 $current_parameter .= $ch;
864 if (($level == 1) and ($ch eq ',')) {
865 print_debug($depth, "FOUND PARAMETER $current_parameter");
866 $current_parameter = parse_value($current_parameter, $depth+1, %cfg);
867 print_debug($depth, "ADD PARAMETER ".scalar(@parameters)." $current_parameter");
868 push @parameters, $current_parameter;
869 $current_parameter = '';
872 $current_parameter .= $ch;
885 (my $name, my $parameters, my $depth, my %cfg) = @_;
887 if ($depth >= MAX_DEPTH) {
888 print STDERR "Too deep.\n";
890 # return '@'.$name.$parameters;
894 print_debug($depth, "PARSE PATTERN $name: $parameters");
897 my @parameter_list = parse_pattern_parameters($parameters, $depth+1, %cfg);
900 foreach my $id (keys %cfg) {
901 unless (($id eq '') or ($id =~ /^[0-9]+$/)) {
902 $subcfg{$id} = $cfg{$id};
906 for (my $i = 0; defined $parameter_list[$i]; $i += 1) {
910 $subcfg{'_'} .= $parameter_list[$i];
911 $subcfg{$i} = $parameter_list[$i];
914 if (defined $cfg{$name}) {
915 my $to_parse = $cfg{$name};
918 while ($to_parse ne '') {
919 print_debug($depth, ":$to_parse");
920 if ($to_parse =~ /^\\(.)(.*)$/) {
921 print_debug($depth, "UNESCAPE \\$1");
922 $parsed .= unescape1ch($1);
925 elsif ($to_parse =~ /^@([A-Za-z0-9_\-\.]+)(.*)$/) {
929 ($to_parse, my $parameters)= get_pattern_parameters($to_parse);
930 print_debug($depth, "FUNCTION PATTERN $id: $parameters");
931 $parsed .= parse_pattern($id, $parameters, $depth+1, %subcfg);
933 elsif ($to_parse =~ /^\$([A-Za-z0-9_\-\.]+)(.*)$/) {
937 ($to_parse, my $parameters)= get_pattern_parameters($to_parse); # ignored anyway
938 print_debug($depth, "STATIC PATTERN $id: $parameters");
939 $parsed .= get_pattern($id, $depth+1, %subcfg);
941 elsif ($to_parse =~ /^([^\$\\@]+)(([\$\\@].*)?)$/) {
942 print_debug($depth, "TEXT $1");
947 my $ch = substr($to_parse, 0, 1);
948 print_debug($depth, "TEXT $ch");
950 $to_parse = substr($to_parse, 1);
955 # elsif ($name =~ /^[0-9+]$/) {
958 elsif ($name eq ESCAPE) {
959 $return = escape(@parameter_list);
961 elsif ($name eq URL_ENCODE) {
962 $return = urlencode(@parameter_list);
964 elsif ($name eq HT_ENCODE) {
965 $return = entityencode(@parameter_list);
967 elsif ($name eq PATH) {
968 $return = join_path($cfg{PATH_SEPARATOR()},@parameter_list);
970 elsif ($name eq SPRINTF) {
971 my $format = shift @parameter_list;
972 $return = sprintf($format,@parameter_list);
975 # return '@'.$name.$parameters;
978 print_debug($depth, "PARSED! PATTERN $return");
983 (my $name, my $depth, my %cfg) = @_;
986 if (defined $cfg{$name}) {
990 # $value = '$'.$name;
993 print_debug($depth, "GET PATTERN $name: $value");
998 (my $to_parse, my $depth, my %cfg) = @_;
1000 if ($depth >= MAX_DEPTH) {
1001 print STDERR "Too deep.\n";
1008 if ($to_parse =~ /^((([^#\\])|(\\.))*)#/) {
1011 if ($to_parse =~ /^[ \t]*(([^ \t](.*[^ \t])?)?)[ \t]*$/) {
1015 print_debug($depth, "PARSE VALUE $to_parse");
1017 while ($to_parse ne '') {
1018 print_debug($depth, ":$to_parse");
1020 if ($to_parse =~ /^\\(.)(.*)$/) {
1021 print_debug($depth, "UNESCAPE \\$1");
1022 $parsed .= unescape1ch($1);
1025 elsif ($to_parse =~ /^@([A-Za-z0-9_\-\.]+)(.*)$/) {
1029 ($to_parse, my $parameters)= get_pattern_parameters($to_parse);
1030 print_debug($depth, "FUNCTION PATTERN $name: $parameters");
1031 $parsed .= parse_pattern($name, $parameters, $depth+1, %cfg);
1033 elsif ($to_parse =~ /^\$([A-Za-z0-9_\-\.]+)(.*)$/) {
1037 ($to_parse, my $parameters)= get_pattern_parameters($to_parse); # ignored anyway
1038 print_debug($depth, "STATIC PATTERN $name: $parameters");
1039 $parsed .= get_pattern($name, $depth+1, %cfg)
1041 elsif ($to_parse =~ /^([^\$\\@]+)(([\$\\@].*)?)$/) {
1042 print_debug($depth, "TEXT $1");
1047 my $ch = substr($to_parse, 0, 1);
1048 print_debug($depth, "TEXT $ch");
1050 $to_parse = substr($to_parse, 1);
1053 print_debug($depth, "PARSED! VALUE $parsed");
1058 (my $path, my $encoding, my $depth, my %cfg) = @_;
1060 print_verbose($depth, 0, "$path");
1062 if ($depth >= MAX_DEPTH) {
1063 print STDERR "Too deep.\n";
1067 print_debug($depth, "PARSE FILE $path");
1070 unless (open $file, "<:encoding($encoding)", encode('locale_fs', $path)) {
1071 print STDERR "Cannot open configfile $path.\n";
1081 while (defined(my $line = <$file>)) {
1082 $line =~ s/[\r\n]//g;
1084 print_debug($depth, "LINE $line");
1086 # new definition name: value
1087 if ($line =~ /^([A-Za-z0-9_\-\.]+)[ \t]*:[ \t](.*)$/) {
1089 unless ($if_block) {
1093 print_debug($depth, "STATIC DEFINE $name=$value")
1096 # new definition name = value
1097 elsif ($line =~ /^([A-Za-z0-9_\-\.]+)[ \t]*=(.*)$/) {
1099 unless ($if_block) {
1103 print_debug($depth, "DYNAMIC DEFINE $name=$value")
1106 # continued definition
1107 elsif ($line =~ /^[ \t](.*)$/) {
1108 unless ($if_block) {
1110 print_debug($depth, "CONTINUE $name $value")
1114 elsif ($line =~ /^include[ \t]+([^ \t](.*[^ \t])?)[ \t]*$/) {
1115 unless ($if_block) {
1116 my $path = parse_value($1, $depth+1, %cfg);
1117 print_debug($depth, "INCLUDE $path");
1118 %cfg = parse_file($path, $encoding, $depth+1, %cfg);
1124 elsif ($line =~ /^if[ \t]+([^ \t](.*[^ \t])?)[ \t]*$/) {
1126 unless ($if_block) {
1127 my $cond = parse_value($1, $depth+1, %cfg);
1128 print_debug($depth, "IF.$if_depth $cond");
1129 unless ($cond ne '') {
1130 $if_block = $if_depth;
1137 elsif ($line =~ /^endif([ \t].*)?$/) {
1138 my $_if_depth = $if_depth;
1139 if ($if_depth > 0) {
1142 if ($if_depth < $if_block) {
1145 unless ($if_block) {
1146 print_debug($depth, "ENDIF.$_if_depth");
1152 elsif ($line =~ /^else([ \t].*)?$/) {
1153 unless ($if_block) {
1154 $if_block = $if_depth;
1155 print_debug($depth, "ELSE.$if_depth");
1157 elsif ($if_depth == $if_block) {
1159 print_debug($depth, "ELSE.$if_depth");
1164 # no el(s)if at this point
1166 # other line, not understood
1172 if (($name ne '') and (not $if_block)) {
1173 if ($parse_mode != 0) {
1174 $value = parse_value($value, $depth+1, %cfg);
1176 $cfg{$name} .= $value;
1177 print_verbose($depth, 1, "$name: $value");
1178 print_debug($depth, "ADD $name=$value")
1187 (my $in, my $out, my $encoding) = @_;
1189 print_verbose(0, 0, "$in -> $out");
1191 my $ref_in = ref($in);
1192 my $ref_out = ref($out);
1206 unless (open $in, "<:encoding($encoding)", encode('locale_fs', $path)) {
1207 print STDERR "Cannot open input file $path.\n";
1214 unless (open $out, ">:encoding($encoding)", encode('locale_fs', $path)) {
1215 print STDERR "Cannot open output file $path.\n";
1223 # TODO: This is extremely inefficient. and does not scale well. Improve!
1225 LINE: while (defined(my $line = <$in>)) {
1227 while ($line =~ /$replace_line/gs) {
1228 my $new_line = apply_pattern($1, $&, undef);
1229 if (defined($new_line)) {
1230 print $out "$new_line\n";
1234 $line =~ s/[\r\n]//g;
1235 $line =~ s/$replace_keyword/apply_pattern($1,$&,$&)/gse;
1237 print $out "$line\n";