bicyclesonthemoon.info Git - botm/config/blob - configure.1.pl

   1 ###RUN_PERL: #!/usr/bin/perl
   2
   3 # configure.pl
   4 # The new BOTM configuration tool
   5
   6 #    Copyright (C) 2022, 2023, 2024 Balthasar Szczepański
   7 #
   8 #    This program is free software: you can redistribute it and/or modify
   9 #    it under the terms of the GNU Affero General Public License as
  10 #    published by the Free Software Foundation, either version 3 of the
  11 #    License, or (at your option) any later version.
  12 #
  13 #    This program is distributed in the hope that it will be useful,
  14 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 #    GNU Affero General Public License for more details.
  17 #
  18 #    You should have received a copy of the GNU Affero General Public License
  19 #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  20
  21 # This is a tool for managing configuration in a project.
  22 # It was initially based on my configuration script from the BSTA software
  23 # but then I have rewriten it from beginning with new design goals.
  24 #
  25 # The goals:
  26 #  - tool to read confinguration files and then apply changes in
  27 #    project's source files
  28 #  - configure the project at compile time, easy cooperation with makefile
  29 #  - possible to keep separately:
  30 #     - the actual important settins
  31 #     - the settings derived from them
  32 #     - the configuration tool
  33 #     - the files to configure
  34 #
  35 # Why:
  36 #
  37 # 1. You write a program. You are silently making assumptions. Whenever
  38 #    you need a path or other dependency, you define ("hard-code") it there.
  39 #    Acceptable for a simple, short, single-use program maybe.
  40 #    But terrible for anything more serious. Maintenance and portability
  41 #    becomes difficult. When you want to change anything you have to look
  42 #    through the whole code to find all places where something is defined.
  43 #    A lot of effort and easy to make a mistake.
  44 #
  45 # 2. Instead of hard-coding everything everywhere you can define the assumed
  46 #    settings at the begnning of each file using 'const' or '#define' or
  47 #    whatever the programming language allows. Even better if you can put the
  48 #    definitions into a single file and include them where needed.
  49 #    Much better but still for some big projects with many files it takes some
  50 #    time and effort to go through all of them. Even if all you wanted was
  51 #    to adapt the project to run on a different system for example.
  52 #
  53 # 3. Ok, you could have all definitions in one place and then apply them
  54 #    to all the files. One way to do it is to make a script which will read
  55 #    the settings from a file and then insert '#define' (C), 'use constant'
  56 #    (Perl), or other statements into the files based on pattern matching.
  57 #
  58 #    This is for configuration done at compile time. Not at run time.
  59 #    There the program itself has to read the settings and make decisions.
  60 #
  61 #    There also exist things like autoconf, automake, etc. see:
  62 #    http://www.mrob.com/pub/comp/unix-building-history.html
  63 #    However for many project I'm currently dealing with, this script
  64 #    here is totally enough and so far I did not have to learn these tools.
  65 #    They are still a black box (or even black magic) to me.
  66 #
  67 # Usage:
  68 #
  69 # perl configure.pl configfile1 configfile2 ... < input_file > output_file
  70 #
  71 # perl configure.pl --option1 ... name=value ... configfile1 configfile2 ... --input inputfile1 inputfile2 ... --output outputfile1 outputfile2
  72 #
  73 # Or './configure.pl' instead of 'perl configure.pl'.
  74 #
  75 # There are 3 types of commandline parameters:
  76 # * in form 'name=value' where 'name' is a valid setting name (described
  77 #   further below): this setting will be loaded from commandline directly
  78 #   (and literally - no processing) before reading any configuration files
  79 # * starting from '--': these are options:
  80 #   * --d, --debug - will print debug information to standard error
  81 #   * --da, --debugall, --debug_all - as above but will also include debug
  82 #     information produced after finished processing the config files
  83 #     (at this moment equivalent to --debug but left for compatibility)
  84 #   * --l, --list - will print the final list of all settings to standard
  85 #     error
  86 #   * --la, --listall, --list_all - as above but will include settings not
  87 #     available for applying in files (starting with '_')
  88 #   * --v, --vv - will print information about processed files and patterns
  89 #     to standard error. Number of 'v's is the verbose level.
  90 #   * --do, --debug_stdout - all debug information will go to standard output
  91 #   * --de, --debug_stderr - all debug information will go to standard error
  92 #   * --i, --in, --input - all further file paths are for input files
  93 #   * --o, --out, --output - all further file paths are for output files
  94 #   * -- - all further file paths are for configuration files
  95 #   * --e=, --encoding= - sets the character encoding for input, output,
  96 #     and files
  97 #   * --ef=, --encoding-file= - sets character encoding for files only,
  98 #     overrides --e
  99 #   * --ecf, --ecnoding-configfile= - sets character encoding for configuration
 100 #     files only, overrides --ef
 101 # * anything else will be treated as a path to a file: input, output,
 102 #   or configuration. By default configuration files are expected.
 103 #
 104 # Unless defined otherwise, the character encoding for all files is UTF-8,
 105 # and for standard input/output it's the encoding determined from locale.
 106 # for parsing command line arguments and for opening files always
 107 # system locale is used, this can not be overridded.
 108 #
 109 # The script reads configuration from all configuration files given in the
 110 # command line parameters. The effect is similar to reading a single
 111 # concatenated file.
 112 #
 113 # Configuration files are read once and in the same order as the command line
 114 # parameters.
 115 #
 116 # Settings provided directly in command line are read before the files. So the
 117 # files can override such settings.
 118 #
 119 # Next, input files are read, converted, and saved to output files.
 120 #
 121 # The input and output files are procesed in the same order as the command line
 122 # parameters. First input file is converted to first output file and so on.
 123 # If there are more output files than input files, then the extra output
 124 # files will not be written.
 125 # If there are less output files than input files, then instead of the missing
 126 # files the standard output is used.
 127 # If there are no input files, then instead the standard input is used (as if
 128 # it was 1 file)
 129 #
 130 # It reads input line by line.
 131 # In each line it looks for replacement patterns, replaces them if found,
 132 # and prints the line to the output.
 133 #
 134 # Whenever I write 'whitespace' I mean it can be space ' ' or TAB '\t'.
 135 #
 136 # It is possible to include files like this:
 137 #   include path_to_include
 138 # The include statement must start from the first column in the line.
 139 # It is equivalent to replacing the line (with the include statement)
 140 # by the content of the included file
 141 #
 142 # settings can be defined in two ways:
 143 #   name: value
 144 #   name = value
 145 # These must start from the first column in the line.
 146 # If a line starts with a whitespace it is treated as a continuation
 147 # of the value:
 148 #   name: line1
 149 #    line2
 150 #   name = line1
 151 #          line2
 152 # The name consists of the characters A-Z, a-z, 0-9, '-', '_', and '.'.
 153 #
 154 # In the first format (name: value):
 155 #   - Any amount (including 0) of whitespace is allowed before the ':'.
 156 #   - The value starts immediately after the first whitespace after the ':'
 157 #     and is taken directly without any processing.
 158 #
 159 # In the second format (name = value) some processing is performed:
 160 #   - Any amount (including 0) of whitespace is allowed before and
 161 #     after the '='.
 162 #   - Comments starting from '#' are removed.
 163 #   - whitespace at beginning and end of each line are removed
 164 #   - escaped characters (by '\') are processed.
 165 #   - other settings inserted by $name are expanded.
 166 #
 167 # It is possible to insert one setting's value into another one like this:
 168 #   s1: 456
 169 #   s2 = 123 $s1 789
 170 # In this example the value of s2 is '123 456 789'.
 171 # This will not work:
 172 #   s2 = 123 $s1 789
 173 #   s1: 456
 174 # When processing s2 $s1 is not defined yet so the value of s2 is:
 175 # '123   789'.
 176 #
 177 # It is possible to use a value as a function pattern:
 178 #   s1: $0 = "$1"
 179 #   s2 = @s1(a,b)
 180 # In this example the value of s2 is: 'a = "b"'.
 181 # Here names made entirely from digits are used as the parameters of the
 182 # function pattern and should not be used for other purposes. Possible but
 183 # not recommended. another special name is $_, which joins all parameters
 184 # by ','.
 185 #
 186 # If not provided, the parameters will be treated as empty string.
 187 # So here:
 188 #   s1: $0 = "$1"
 189 #   s2 = @s1(a)
 190 # the value of s2 is: 'a = ""'.
 191 #
 192 # Even though $0 and $1 were not processed when s1 was defined, they
 193 # were processed when s1 was used as a function pattern.
 194 #
 195 # In this example:
 196 #   s1 = $0 = "$1"
 197 #   s2 = @s1(a,b)
 198 # s1 will be immediately evaluated to ' = ""', and s2 will also become
 199 # ' = ""'.
 200 #
 201 # Escaping will protect $0 and $2 to be processed too early:
 202 #   s1 = \$0 = "\$1"
 203 #   s2 = @s1(a,b)
 204 # Here the value of s1 is '$0 = "$1"' again.
 205 #
 206 # The pattern function calls can be nested:
 207 #   s1: >$0<
 208 #   s2: <$0>
 209 #   s3 = @s2(@s1(1))
 210 # In this example this is how it will be processed:
 211 #   processing s3, '@s2(@s1(1))'
 212 #   found call to s2 with parameters '(@s1(1))'
 213 #     processing s2, '<$0>', '(@s1(1))'
 214 #       processing parameters '(@s1(1))'
 215 #       found parameter '@s1(1)'
 216 #         processing '@s1(1)'
 217 #         found call to s1 with parameters '(1)'
 218 #           processing s1, '>$0<', '(1)'
 219 #             processing parameters '(1)'
 220 #             found parameter '1'
 221 #               processing '1'
 222 #               return '1'
 223 #             value is now '1'
 224 #             return ('1')
 225 #           found call to $0 with no parameters
 226 #           replace $0 with '1'
 227 #           value is now '>1<'
 228 #           return '>1<'
 229 #         replace $s1() with '>1<'
 230 #         value is now '>1<'
 231 #         return '>1<'
 232 #       value is now '>1<'
 233 #       return ('>1<')
 234 #     found call to $0 with no parameters
 235 #     replace $0 with '>1<'
 236 #     value is now '<>1<>'
 237 #     return '<>1<>'
 238 #   replace $2() with '<>1<>'
 239 #   value is now '<>1<>'
 240 #   return '<>1<>'
 241 #
 242 # So when processing a value, if a pattern function call is found,
 243 # then first, the parameters are separated, and each is processed one
 244 # recursion level deeper.
 245 # then the pattern is processed (also one recursion level deeper than the
 246 # base value) and the $0, etc. are replaced by the (already processed)
 247 # parameters.
 248 #
 249 # The parameter list is used for '@name' and '$name' even if '$' doesn't
 250 # take any parameters.
 251 # Empty parameter list can be useful for separating from text afterwards:
 252 #   s1: 456
 253 #   s2 = 123$s1()789
 254 # Here the value of s2 is: '123456789'.
 255 # Without the '()':
 256 #       s1: 456
 257 #   s2 = 123$s1789
 258 #  Here the value of s2 is: '123' because $s1789 was not found.
 259 #
 260 # Escaping the '(' prevents interpreting it as some parameters:
 261 #   s1: 456
 262 #   s2 = (123)$s1\(789)
 263 # Here the value of s2 is: '(123)456(789)'.
 264 # Without the '\(':
 265 #   s1: 456
 266 #   s2 = (123)$s1(789)
 267 # Here the value of s2 is: '(123)456' because '789' was given to s1
 268 # as $0 and ignored.
 269 #
 270 # Function patterns can also be given as parameters.
 271 # Example:
 272 #   s1: @0($1,$2)
 273 #   s2: ($0)_($1)
 274 #   s3 = s1($s2, a, b)
 275 # Here the value of s3 is '(a)_(b)'.
 276 #
 277 # There are special functions and values:
 278 #   @_ESCAPE is a function which will escape some characters with '\'.
 279 #     $0 - the text to be escaped
 280 #     $1 - the regexp which specifies which characters should be escaped
 281 #          if not specified then a default regexp is used
 282 #   @_URL_ENCODE is a function which will perform percent-encoding
 283 #     $0 - the text to be encoded
 284 #     $1 - the regexp which specifies which characters should be encoded
 285 #          if not specified then a default regexp is used
 286 #   @_HT_ENCODE is a function which will perform HTML entity encoding
 287 #     $0 - the text to be encoded
 288 #     $1 - the regexp which specifies which characters should be encoded
 289 #          if not specified then a default regexp is used
 290 #   @_SPRINTF is a call to the Perl sprintf() function
 291 #     $0 - the format string
 292 #     $1 and continuing - the parameters to sprintf()
 293 #   @_PATH is a function which will join a path from segments
 294 #     $0 and continuing - the segments to be joined
 295 #   $_PATH_SEPATATOR is the path separator used by $_PATH.
 296 #     can be overwritten, default value '/'.
 297 #   $_REPLACE_LINE is the pattern to look for in files, to replace the
 298 #     entire line. Can be overwritten. Default value '###$0:'
 299 #     $0 - the name of the setting
 300 #   $_REPLACE_KEYWORD is the pattern to look for in files, to replace
 301 #     just the pattern. Can be overwritten. Default value '###$0;'
 302 #     $0 - the name of the setting
 303 #   More functions and values can be added in future versions.
 304 # $_REPLACE_LINE and $_REPLACE_KEYWORD are NOT evaluated the same way
 305 # as normal patterns. Instead they are converted to regexp patterns where
 306 # $0 is replaced with the keyword regexp.
 307 #
 308 # The settings can be conditional thanks to using statements like:
 309 #    if
 310 #    else
 311 #    endif
 312 #
 313 # Anything after the "if" keyword is the codition to check.
 314 # Condition is considered true if its value is not an empty string
 315 # Condition is parsed the same way as a dynamic setting definition
 316 #
 317 # Conditional statements must start at from first column in the line.
 318 # Conditional statements can be nested
 319 #
 320 # Example:
 321 #   s1: 1
 322 #   if $s1
 323 #   s3 = $s1
 324 #   if $s2
 325 #   s3 = $s2
 326 #   endif
 327 #   else
 328 #   s3: 0
 329 #   endif
 330 # Here the value of s3 is '1'.
 331 #
 332 #
 333 # Inserting settings to source files:
 334 #
 335 # After all settings are read, the actual source is processed line by line.
 336 # In each line the tool will look for patterns to replace the entire line.
 337 # The patterns are built by inserting settings names to $REPLACE_LINE.
 338 # For example if:
 339 #   REPLACE_LINE: ###$0:
 340 #   s1: ABC
 341 # then each line containing '###s1:' will be replaced by 'ABC', and so on.
 342 # The patterns are checked in unpredictable order (because Perl hash).
 343 # If any match is found, the line is replaced and the tool moves to next line.
 344 # If none of the line patterns are matched then comes next step:
 345 # The tool will now look for patterns to replace in the line.
 346 # The patterns are built by inserting settings names to $REPLACE_KEYWORD.
 347 # For example if:
 348 #   REPLACE_KEYWORD: ###$0;
 349 #   s3: XYZ
 350 # then each occurence of '###s3:' will be replaced by 'XYZ', and so on.
 351 # The patterns are checked in unpredictable order (because Perl hash)
 352 #
 353 # Use in project:
 354 #
 355 # Either install the tool somewhere on the system and multiple projects
 356 # can call it, or copy the tool as part of the project
 357 #
 358 # See the makefile of the tool's project as a simple example.
 359 #
 360 # Of course if you found this tool as part of a different project then
 361 # you probably will not see the original makefile.
 362 # Instead maybe check how the tool is used in the project.
 363 #
 364 # Some ideas:
 365 #
 366 # It's a good idea to use the tool together with the makefile, to compile
 367 # the project.
 368 #
 369 # Generate source files and then use them:
 370 #
 371 #   abc: abc.c
 372 #       gcc -o abc abc.c
 373 #
 374 #   abc.c: abc.1.c settings.txt configure.pl
 375 #       perl configure.pl settings.txt < abd.1.c > abc.c
 376 #
 377 # Of course not only the source files but also the makefile itself can be
 378 # configured:
 379 #
 380 #   makefile: makefile.1.mak settings.txt configure.pl
 381 #       perl configure.pl settings.txt < makefile.1.mak > makefile
 382 #
 383 # If the settings have changed then any attept to make something will
 384 # first regenerate the makefile and then use it.
 385 #
 386 # Use different settings for different targets:
 387 #
 388 #   ifndef TARGET
 389 #   TARGET = debug
 390 #   endif
 391 #
 392 #   SETTINGS = settings-$(TARGET).txt settings.txt
 393 #
 394 #   makefile: makefile.1.mak $(SETTINGS) configure.pl
 395 #       perl configure.pl $(SETTINGS) < makefile.1.mak > makefile
 396 #
 397 # Then use:
 398 #   make TARGET=targetname something_to_make
 399 #
 400 # Or even replace the default target by a configurable pattern.
 401 # then, to set a new target use:
 402 #   make TARGET=targetname -B makefile
 403 # And afterwards the new tatget will be the default one.
 404
 405 use strict;
 406 use utf8;
 407 use Encode::Locale ('decode_argv');
 408 use Encode ('encode', 'decode');
 409
 410 use constant MAX_DEPTH               => 256;
 411 use constant KEYWORD_PATTERN         => '[A-Za-z0-9_\-\.]+';
 412 use constant DEFAULT_PATH_SEPARATOR  => '/';
 413 use constant DEFAULT_REPLACE_LINE    => '###$0:';
 414 use constant DEFAULT_REPLACE_KEYWORD => '###$0;';
 415
 416 use constant REPLACE_LINE    => '_REPLACE_LINE';
 417 use constant REPLACE_KEYWORD => '_REPLACE_KEYWORD';
 418 use constant PATH_SEPARATOR  => '_PATH_SEPARATOR';
 419
 420 use constant ESCAPE     => '_ESCAPE';
 421 use constant URL_ENCODE => '_URL_ENCODE';
 422 use constant HT_ENCODE  => '_HT_ENCODE';
 423 use constant PATH       => '_PATH';
 424 use constant SPRINTF    => '_SPRINTF';
 425
 426 # TODO more special functions
 427
 428 my %cfg;
 429 my @config_files;
 430 my @input_files;
 431 my @output_files;
 432 my $debug_enabled = 0;
 433 my $debug_all = 0;
 434 my $list = 0;
 435 my $list_all = 0;
 436 my $file_type = '';
 437 my $verbose = 0;
 438 my $replace_line;
 439 my $replace_keyword;
 440 my $encoding = '';
 441 my $encoding_file = '';
 442 my $encoding_configfile = '';
 443 my $encoding_stdin;
 444 my $encoding_stdout;
 445 my $debug_out = \*STDERR;
 446
 447 $cfg{REPLACE_LINE()}    = DEFAULT_REPLACE_LINE;
 448 $cfg{REPLACE_KEYWORD()} = DEFAULT_REPLACE_KEYWORD;
 449 $cfg{PATH_SEPARATOR()}  = DEFAULT_PATH_SEPARATOR;
 450
 451 decode_argv();
 452
 453 foreach my $arg (@ARGV) {
 454         if ($arg =~ /^(--?)(.*)$/) { # option
 455                 $arg = $2;
 456                 if (($1 eq '-') and (length($arg)>1)) {
 457                         # invalid for now
 458                 }
 459                 elsif ($arg =~ /^d(ebug)?$/) {
 460                         $debug_enabled = 1;
 461                 }
 462                 elsif ($arg =~ /^(da)|(debug[_\-]?all)$/) {
 463                         $debug_enabled = 1;
 464                         $debug_all = 1;
 465                 }
 466                 elsif ($arg =~ /^l(ist)?$/) {
 467                         $list = 1;
 468                 }
 469                 elsif ($arg =~ /^(la)|(list[_\-]?all)$/) {
 470                         $list = 1;
 471                         $list_all = 1;
 472                 }
 473                 elsif ($arg =~ /^v+$/) {
 474                         $verbose += length($&);
 475                 }
 476                 elsif ($arg =~ /^(do)|(debug[_\-]?stdout)$/) {
 477                         $debug_out = \*STDOUT;
 478                 }
 479                 elsif ($arg =~ /^(de)|(debug[_\-]?stderr)$/) {
 480                         $debug_out = \*STDERR;
 481                 }
 482                 elsif ($arg =~ /^i(n(put)?)?$/) {
 483                         $file_type = 'i';
 484                 }
 485                 elsif ($arg =~ /^o(ut(put)?)?$/) {
 486                         $file_type = 'o';
 487                 }
 488                 elsif ($arg = /^e(ncoding)?=(.*)$/) {
 489                         $encoding = $2;
 490                 }
 491                 elsif ($arg = /^(ef)|(encoding-file)=(.*)$/) {
 492                         $encoding_file = $3;
 493                 }
 494                 elsif ($arg = /^(ecf?)|(encoding-configfile)=(.*)$/) {
 495                         $encoding_configfile = $3;
 496                 }
 497                 elsif ($arg eq '') {
 498                         $file_type = '';
 499                 }
 500         }
 501         elsif ($arg =~ /^([A-Za-z0-9_\-\.]+)=(.*)$/) { # predefined setting
 502                 $cfg{$1} = $2;
 503                 print_verbose(0, 1, "$1: $2");
 504         }
 505         else { # file
 506                 if ($file_type eq 'i') {
 507                         push @input_files, $arg;
 508                 }
 509                 elsif ($file_type eq 'o') {
 510                         push @output_files, $arg;
 511                 }
 512                 else {
 513                         push @config_files, $arg;
 514                 }
 515         }
 516 }
 517
 518 if ($encoding eq '') {
 519         $encoding        = 'UTF-8';
 520         $encoding_stdin  = 'console_in';
 521         $encoding_stdout = 'console_out';
 522 }
 523 else {
 524         $encoding_stdin  = $encoding;
 525         $encoding_stdout = $encoding;
 526 }
 527 if ($encoding_file eq '') {
 528         $encoding_file = $encoding;
 529 }
 530 if ($encoding_configfile eq '') {
 531         $encoding_configfile = $encoding_file;
 532 }
 533
 534 binmode STDIN,  ":encoding($encoding_stdin)";
 535 binmode STDOUT, ":encoding($encoding_stdout)";
 536 binmode STDERR, ":encoding($encoding_stdout)";
 537
 538 foreach my $file (@config_files) {
 539         %cfg = parse_file($file, $encoding_configfile, 0, %cfg);
 540 }
 541
 542 $replace_line    = make_pattern_regexp($cfg{REPLACE_LINE()   });
 543 $replace_keyword = make_pattern_regexp($cfg{REPLACE_KEYWORD()});
 544
 545 print_debug(0, "REPLACE_LINE REGEXP $replace_line");
 546 print_debug(0, "REPLACE_KEYWORD REGEXP $replace_keyword");
 547
 548 unless ($debug_all) {
 549         $debug_enabled = 0;
 550 }
 551
 552 foreach my $key (keys %cfg) {
 553         if (
 554                 (substr($key,0,1) eq '_') or
 555                 ($key =~ /^[0-9]+$/)
 556         ) {
 557                 if ($list_all) {
 558                         print $debug_out format_cfg($key, $cfg{$key});
 559                 }
 560                 delete %cfg{$key};
 561                 next;
 562         }
 563         if ($list or $list_all) {
 564                 print $debug_out format_cfg($key, $cfg{$key});
 565         }
 566 }
 567
 568
 569 if (@input_files == 0) {
 570         push @input_files, '';
 571 }
 572
 573 while (@input_files > 0) {
 574         my $in = shift @input_files;
 575         my $out = shift @output_files;
 576         convert_file($in, $out, $encoding_file);
 577 }
 578
 579 sub format_cfg {
 580         (my $name, my $value) = @_;
 581         $value =~ s/\n/\n /g;
 582         return $name.': '.$value."\n";
 583 }
 584
 585 # replace WITHOUT REGEXP
 586 sub replace {
 587         (my $text, my $to_replace, my $replacement) = @_;
 588
 589         my $ind = index($text, $to_replace);
 590         if ($ind <0) {
 591                 return (0, $text);
 592         }
 593         my $len = length($to_replace);
 594
 595         my $before = substr($text, 0, $ind);
 596         my $after = substr($text, $ind+$len);
 597
 598         return (1, $before.$replacement.$after);
 599 }
 600
 601 # # replace but with mask to avoid duplicated replacement
 602 # sub replace_masked {
 603         # (my $text, my $mask, my $to_replace, my $replacement) = @_;
 604
 605         # my $ind = index($mask, $to_replace);
 606         # if ($ind <0) {
 607                 # return (0, $text, $mask);
 608         # }
 609         # my $len = length($to_replace);
 610
 611         # my $text_before = substr($text, 0, $ind);
 612         # my $mask_before = substr($mask, 0, $ind);
 613         # my $mask_insert = "\r" x length($replacement); # CR can never appear in $text
 614         # my $text_after = substr($text, $ind+$len);
 615         # my $mask_after = substr($mask, $ind+$len);
 616
 617         # print_verbose(0, 1, "$to_replace -> $replacement");
 618
 619         # $text = $text_before.$replacement.$text_after;
 620         # $mask = $mask_before.$mask_insert.$mask_after;
 621
 622         # return (1, $text, $mask);
 623 # }
 624
 625 # do the pattern replacement
 626 sub apply_pattern {
 627         (my $name, my $pattern, my $fallback) = @_;
 628         if (defined($cfg{$name})) {
 629                 print_verbose(0, 1, "$pattern -> $cfg{$name}");
 630                 return $cfg{$name};
 631         }
 632         else {
 633                 return $fallback;
 634         }
 635 }
 636
 637 # make the regexp for replacement patterns
 638 sub make_pattern_regexp {
 639         (my $pattern) = @_;
 640
 641         my $ind = index($pattern, '$0');
 642         if ($ind <0) {
 643                 return '';
 644         }
 645         my $before = substr($pattern, 0, $ind);
 646         my $after  = substr($pattern, $ind+2);
 647
 648         $before =~ s/[\\\^\.\$\|\(\)\[\]\*\+\?\{\}\-\#]/\\$&/gs;
 649         $after =~ s/[\\\^\.\$\|\(\)\[\]\*\+\?\{\}\-\#]/\\$&/gs;
 650
 651         $pattern = $before . '([A-Za-z0-9_\\-\\.]+)' . $after;
 652         $pattern = qr/$pattern/;
 653         return $pattern;
 654 }
 655
 656 sub print_debug {
 657         (my $depth, my $text) = @_;
 658         if ($debug_enabled) {
 659                 # $debug_text .= ('  'x$depth).$text."\n";
 660                 print $debug_out ('  'x$depth).$text."\n";
 661         }
 662 }
 663
 664 sub print_verbose {
 665         (my $depth, my $level, my $text) = @_;
 666         if ($level < $verbose) {
 667                 print $debug_out ('  'x($depth + $level)).$text."\n";
 668         }
 669 }
 670
 671 sub unescape1ch {
 672         (my $to_escape) = @_;
 673
 674         if    ($to_escape eq 'a') { return "\a";}
 675         elsif ($to_escape eq 'b') { return "\b";}
 676         elsif ($to_escape eq 'e') { return "\e";}
 677         elsif ($to_escape eq 'f') { return "\f";}
 678         elsif ($to_escape eq 'n') { return "\n";}
 679         elsif ($to_escape eq 'r') { return "\r";}
 680         elsif ($to_escape eq 't') { return "\t";}
 681         else                      { return $to_escape;}
 682 }
 683
 684 sub escape {
 685         (my $text, my $match) = @_;
 686         unless (defined $match) {
 687                 $match = '[\\\\\\\'\\\"]';
 688                 #         [ \ \ \ ' \ "]
 689                 #         [   \   '   "]
 690         }
 691
 692         my $outcome = '';
 693         foreach my $ch (split('', $text)) {
 694                 if ($ch =~ $match) {
 695                         if    ($ch eq "\a") { $outcome .= '\\a';}
 696                         elsif ($ch eq "\b") { $outcome .= '\\b';}
 697                         elsif ($ch eq "\e") { $outcome .= '\\e';}
 698                         elsif ($ch eq "\f") { $outcome .= '\\f';}
 699                         elsif ($ch eq "\n") { $outcome .= '\\n';}
 700                         elsif ($ch eq "\r") { $outcome .= '\\r';}
 701                         elsif ($ch eq "\t") { $outcome .= '\\t';}
 702                         else                { $outcome .= '\\'.$ch;}
 703                 }
 704                 else {
 705                         $outcome .= $ch;
 706                 }
 707         }
 708         return $outcome;
 709 }
 710
 711 # UTF-8 only
 712 # TODO: this is almost duplicated from the common library
 713 sub urlencode {
 714         (my $text, my $match) = @_;
 715         unless (defined $match) {
 716                 $match = '[^0-9A-Za-z.~\-_]'
 717         }
 718
 719         my $outcome = '';
 720         foreach my $ch (split('', $text)) {
 721                 if ($ch =~ $match) {
 722                         my $enc = encode('UTF-8', $ch);
 723                         foreach my $b (split('', $enc)) {
 724                                 $outcome .= sprintf('%%%02hX',ord($b));
 725                         }
 726                 }
 727                 else {
 728                         $outcome .= $ch;
 729                 }
 730         }
 731         return $outcome;
 732 }
 733
 734 # TODO: this is almost duplicated from the common library
 735 sub entityencode {
 736         (my $text, my $match) = @_;
 737         unless (defined $match) {
 738                 $match = '[\"=><\&]'
 739         }
 740
 741         my $outcome = '';
 742         foreach my $ch (split('', $text)) {
 743                 if ($ch =~ $match) {
 744                         $outcome .= sprintf('&#%02hu;',ord($ch));
 745                 }
 746                 else {
 747                         $outcome .= $ch;
 748                 }
 749         }
 750         return $outcome;
 751 }
 752
 753 # TODO: this is duplicated from the common library
 754 sub join_path {
 755         (my $joiner, my @segments) = @_;
 756
 757         my $path = '';
 758         foreach my $segment (@segments) {
 759                 if ($path eq '') {
 760                         $path = $segment;
 761                 }
 762                 else {
 763                         unless (substr ($path, -1) eq $joiner) {
 764                                 $path .= $joiner;
 765                         }
 766                         if (substr ($segment, 0, 1) eq $joiner) {
 767                                 $path = $segment;
 768                         }
 769                         else {
 770                                 $path .= $segment;
 771                         }
 772                 }
 773         }
 774         return $path;
 775 }
 776
 777 sub get_pattern_parameters {
 778         (my $text) = @_;
 779         my $parameters;
 780
 781         my $level = 0;
 782         my $esc = 0;
 783
 784         while ($text ne '') {
 785                 my $ch = substr($text, 0, 1);
 786                 my $rest = substr($text, 1);
 787
 788                 if ($esc) {
 789                         $parameters .= $ch;
 790                         $esc = 0;
 791                 }
 792                 elsif ($ch eq '(') {
 793                         $parameters .= $ch;
 794                         $level += 1;
 795                 }
 796                 else {
 797                         if ($level == 0) {
 798                                 last;
 799                         }
 800                         $parameters .= $ch;
 801                         if ($ch eq ')') {
 802                                 $level -= 1;
 803                         }
 804                         elsif ($ch eq '\\') {
 805                                 $esc = 1;
 806                         }
 807                         else {
 808                         }
 809                 }
 810                 $text = $rest;
 811                 if ($level == 0) {
 812                         last;
 813                 }
 814         }
 815         return ($text, $parameters);
 816 }
 817
 818 sub parse_pattern_parameters {
 819         (my $text, my $depth, my %cfg) = @_;
 820
 821         print_debug($depth, "PARSE PARAMETERS $text");
 822
 823         my @parameters;
 824         my $current_parameter = '';
 825         my $level = 0;
 826         my $esc = 0;
 827
 828         while ($text ne '') {
 829                 my $ch = substr($text, 0, 1);
 830                 my $rest = substr($text, 1);
 831
 832                 if ($esc) {
 833                         $current_parameter .= $ch;
 834                         $esc = 0;
 835                 }
 836                 elsif ($ch eq '(') {
 837                         if ($level != 0) {
 838                                 $current_parameter .= $ch;
 839                         }
 840                         $level += 1;
 841                 }
 842                 else {
 843                         if ($level == 0) {
 844                                 last;
 845                         }
 846                         if ($ch eq ')') {
 847                                 $level -= 1;
 848                                 if ($level == 0) {
 849                                         print_debug($depth, "FOUND PARAMETER $current_parameter");
 850                                         $current_parameter = parse_value($current_parameter, $depth+1, %cfg);
 851                                         print_debug($depth, "ADD PARAMETER ".scalar(@parameters)." $current_parameter");
 852                                         push @parameters, $current_parameter;
 853                                         $current_parameter = '';
 854                                 }
 855                                 else {
 856                                         $current_parameter .= $ch;
 857                                 }
 858                         }
 859                         elsif ($ch eq '\\') {
 860                                 $current_parameter .= $ch;
 861                                 $esc = 1;
 862                         }
 863                         else {
 864                                 if (($level == 1) and ($ch eq ',')) {
 865                                         print_debug($depth, "FOUND PARAMETER $current_parameter");
 866                                         $current_parameter = parse_value($current_parameter, $depth+1, %cfg);
 867                                         print_debug($depth, "ADD PARAMETER ".scalar(@parameters)." $current_parameter");
 868                                         push @parameters, $current_parameter;
 869                                         $current_parameter = '';
 870                                 }
 871                                 else {
 872                                         $current_parameter .= $ch;
 873                                 }
 874                         }
 875                 }
 876                 $text = $rest;
 877                 if ($level == 0) {
 878                         last;
 879                 }
 880         };
 881         return @parameters;
 882 }
 883
 884 sub parse_pattern {
 885         (my $name, my $parameters, my $depth, my %cfg) = @_;
 886
 887         if ($depth >= MAX_DEPTH) {
 888                 print STDERR "Too deep.\n";
 889                 exit 3;
 890                 # return '@'.$name.$parameters;
 891                 # return '';
 892         }
 893
 894         print_debug($depth, "PARSE PATTERN $name: $parameters");
 895         my $return;
 896
 897         my @parameter_list = parse_pattern_parameters($parameters, $depth+1, %cfg);
 898
 899         my %subcfg;
 900         foreach my $id (keys %cfg) {
 901                 unless (($id eq '') or ($id =~ /^[0-9]+$/)) {
 902                         $subcfg{$id} = $cfg{$id};
 903                 }
 904         }
 905         $subcfg{'_'} = '';
 906         for (my $i = 0; defined $parameter_list[$i]; $i += 1) {
 907                 if ($i) {
 908                         $subcfg{'_'} .= ',';
 909                 }
 910                 $subcfg{'_'} .= $parameter_list[$i];
 911                 $subcfg{$i} = $parameter_list[$i];
 912         }
 913
 914         if (defined $cfg{$name}) {
 915                 my $to_parse = $cfg{$name};
 916                 my $parsed = '';
 917
 918                 while ($to_parse ne '') {
 919                         print_debug($depth, ":$to_parse");
 920                         if ($to_parse =~ /^\\(.)(.*)$/) {
 921                                 print_debug($depth, "UNESCAPE \\$1");
 922                                 $parsed .= unescape1ch($1);
 923                                 $to_parse = $2;
 924                         }
 925                         elsif ($to_parse =~ /^@([A-Za-z0-9_\-\.]+)(.*)$/) {
 926                                 my $id = $1;
 927                                 $to_parse = $2;
 928
 929                                 ($to_parse, my $parameters)= get_pattern_parameters($to_parse);
 930                                 print_debug($depth, "FUNCTION PATTERN $id: $parameters");
 931                                 $parsed .= parse_pattern($id, $parameters, $depth+1, %subcfg);
 932                         }
 933                         elsif ($to_parse =~ /^\$([A-Za-z0-9_\-\.]+)(.*)$/) {
 934                                 my $id = $1;
 935                                 $to_parse = $2;
 936
 937                                 ($to_parse, my $parameters)= get_pattern_parameters($to_parse); # ignored anyway
 938                                 print_debug($depth, "STATIC PATTERN $id: $parameters");
 939                                 $parsed .= get_pattern($id, $depth+1, %subcfg);
 940                         }
 941                         elsif ($to_parse =~ /^([^\$\\@]+)(([\$\\@].*)?)$/) {
 942                                 print_debug($depth, "TEXT $1");
 943                                 $parsed .= $1;
 944                                 $to_parse = $2;
 945                         }
 946                         else {
 947                                 my $ch = substr($to_parse, 0, 1);
 948                                 print_debug($depth, "TEXT $ch");
 949                                 $parsed .= $ch;
 950                                 $to_parse = substr($to_parse, 1);
 951                         }
 952                 }
 953                 $return = $parsed;
 954         }
 955         # elsif ($name =~ /^[0-9+]$/) {
 956                 # $return = '';
 957         # }
 958         elsif ($name eq ESCAPE) {
 959                 $return = escape(@parameter_list);
 960         }
 961         elsif ($name eq URL_ENCODE) {
 962                 $return = urlencode(@parameter_list);
 963         }
 964         elsif ($name eq HT_ENCODE) {
 965                 $return = entityencode(@parameter_list);
 966         }
 967         elsif ($name eq PATH) {
 968                 $return = join_path($cfg{PATH_SEPARATOR()},@parameter_list);
 969         }
 970         elsif ($name eq SPRINTF) {
 971                 my $format = shift @parameter_list;
 972                 $return = sprintf($format,@parameter_list);
 973         }
 974         else {
 975                 # return '@'.$name.$parameters;
 976                 $return = '';
 977         }
 978         print_debug($depth, "PARSED! PATTERN $return");
 979         return $return;
 980 }
 981
 982 sub get_pattern {
 983         (my $name, my $depth, my %cfg) = @_;
 984         my $value;
 985
 986         if (defined $cfg{$name}) {
 987                 $value = $cfg{$name}
 988         }
 989         else {
 990                 # $value = '$'.$name;
 991                 $value = '';
 992         }
 993         print_debug($depth, "GET PATTERN $name: $value");
 994         return $value;
 995 }
 996
 997 sub parse_value {
 998         (my $to_parse, my $depth, my %cfg) = @_;
 999
1000         if ($depth >= MAX_DEPTH) {
1001                 print STDERR "Too deep.\n";
1002                 exit 3;
1003                 # return $to_parse;
1004                 # return ''
1005         }
1006         my $parsed = '';
1007
1008         if ($to_parse =~ /^((([^#\\])|(\\.))*)#/) {
1009                 $to_parse = $1;
1010         }
1011         if ($to_parse =~ /^[ \t]*(([^ \t](.*[^ \t])?)?)[ \t]*$/) {
1012                 $to_parse = $1;
1013         }
1014
1015         print_debug($depth, "PARSE VALUE $to_parse");
1016
1017         while ($to_parse ne '') {
1018                 print_debug($depth, ":$to_parse");
1019                 # escape
1020                 if ($to_parse =~ /^\\(.)(.*)$/) {
1021                         print_debug($depth, "UNESCAPE \\$1");
1022                         $parsed .= unescape1ch($1);
1023                         $to_parse = $2;
1024                 }
1025                 elsif ($to_parse =~ /^@([A-Za-z0-9_\-\.]+)(.*)$/) {
1026                         my $name = $1;
1027                         $to_parse = $2;
1028
1029                         ($to_parse, my $parameters)= get_pattern_parameters($to_parse);
1030                         print_debug($depth, "FUNCTION PATTERN $name: $parameters");
1031                         $parsed .= parse_pattern($name, $parameters, $depth+1, %cfg);
1032                 }
1033                 elsif ($to_parse =~ /^\$([A-Za-z0-9_\-\.]+)(.*)$/) {
1034                         my $name = $1;
1035                         $to_parse = $2;
1036
1037                         ($to_parse, my $parameters)= get_pattern_parameters($to_parse); # ignored anyway
1038                         print_debug($depth, "STATIC PATTERN $name: $parameters");
1039                         $parsed .= get_pattern($name, $depth+1, %cfg)
1040                 }
1041                 elsif ($to_parse =~ /^([^\$\\@]+)(([\$\\@].*)?)$/) {
1042                         print_debug($depth, "TEXT $1");
1043                         $parsed .= $1;
1044                         $to_parse = $2;
1045                 }
1046                 else {
1047                         my $ch = substr($to_parse, 0, 1);
1048                         print_debug($depth, "TEXT $ch");
1049                         $parsed .= $ch;
1050                         $to_parse = substr($to_parse, 1);
1051                 }
1052         }
1053         print_debug($depth, "PARSED! VALUE $parsed");
1054         return $parsed;
1055 }
1056
1057 sub parse_file {
1058         (my $path, my $encoding, my $depth, my %cfg) = @_;
1059
1060         print_verbose($depth, 0, "$path");
1061
1062         if ($depth >= MAX_DEPTH) {
1063                 print STDERR "Too deep.\n";
1064                 exit 3;
1065         }
1066
1067         print_debug($depth, "PARSE FILE $path");
1068
1069         my $file;
1070         unless (open $file, "<:encoding($encoding)", encode('locale_fs', $path)) {
1071                 print STDERR "Cannot open configfile $path.\n";
1072                 exit 2;
1073         }
1074
1075         my $parse_mode = 0;
1076         my $name = '';
1077         my $value = '';
1078         my $if_depth = 0;
1079         my $if_block = 0;
1080
1081         while (defined(my $line = <$file>)) {
1082                 $line =~ s/[\r\n]//g;
1083
1084                 print_debug($depth, "LINE $line");
1085
1086                 # new definition name: value
1087                 if ($line =~ /^([A-Za-z0-9_\-\.]+)[ \t]*:[ \t](.*)$/) {
1088                         $parse_mode = 0;
1089                         unless ($if_block) {
1090                                 $name = $1;
1091                                 $value = $2;
1092                                 $cfg{$name} = '';
1093                                 print_debug($depth, "STATIC DEFINE $name=$value")
1094                         }
1095                 }
1096                 # new definition name = value
1097                 elsif ($line =~ /^([A-Za-z0-9_\-\.]+)[ \t]*=(.*)$/) {
1098                         $parse_mode = 1;
1099                         unless ($if_block) {
1100                                 $name = $1;
1101                                 $value = $2;
1102                                 $cfg{$name} = '';
1103                                 print_debug($depth, "DYNAMIC DEFINE $name=$value")
1104                         }
1105                 }
1106                 # continued definition
1107                 elsif ($line =~ /^[ \t](.*)$/) {
1108                         unless ($if_block) {
1109                                 $value = "\n".$1;
1110                                 print_debug($depth, "CONTINUE $name $value")
1111                         }
1112                 }
1113                 # include file
1114                 elsif ($line =~ /^include[ \t]+([^ \t](.*[^ \t])?)[ \t]*$/) {
1115                         unless ($if_block) {
1116                                 my $path = parse_value($1, $depth+1, %cfg);
1117                                 print_debug($depth, "INCLUDE $path");
1118                                 %cfg = parse_file($path, $encoding, $depth+1, %cfg);
1119                                 $name = '';
1120                                 $value = '';
1121                         }
1122                 }
1123                 # if
1124                 elsif ($line =~ /^if[ \t]+([^ \t](.*[^ \t])?)[ \t]*$/) {
1125                         $if_depth += 1;
1126                         unless ($if_block) {
1127                                 my $cond = parse_value($1, $depth+1, %cfg);
1128                                 print_debug($depth, "IF.$if_depth $cond");
1129                                 unless ($cond ne '') {
1130                                         $if_block = $if_depth;
1131                                 }
1132                         }
1133                         $name = '';
1134                         $value = '';
1135                 }
1136                 # endif
1137                 elsif ($line =~ /^endif([ \t].*)?$/) {
1138                         my $_if_depth = $if_depth;
1139                         if ($if_depth > 0) {
1140                                 $if_depth -= 1;
1141                         }
1142                         if ($if_depth < $if_block) {
1143                                 $if_block = 0;
1144                         }
1145                         unless ($if_block) {
1146                                 print_debug($depth, "ENDIF.$_if_depth");
1147                         }
1148                         $name = '';
1149                         $value = '';
1150                 }
1151                 # else
1152                 elsif ($line =~ /^else([ \t].*)?$/) {
1153                         unless ($if_block) {
1154                                 $if_block = $if_depth;
1155                                 print_debug($depth, "ELSE.$if_depth");
1156                         }
1157                         elsif ($if_depth == $if_block) {
1158                                 $if_block = 0;
1159                                 print_debug($depth, "ELSE.$if_depth");
1160                         }
1161                         $name = '';
1162                         $value = '';
1163                 }
1164                 # no el(s)if at this point
1165
1166                 # other line, not understood
1167                 else {
1168                         $name = '';
1169                         $value = '';
1170                 }
1171
1172                 if (($name ne '') and (not $if_block)) {
1173                         if ($parse_mode != 0) {
1174                                 $value = parse_value($value, $depth+1, %cfg);
1175                         }
1176                         $cfg{$name} .= $value;
1177                         print_verbose($depth, 1,  "$name: $value");
1178                         print_debug($depth, "ADD $name=$value")
1179                 }
1180         }
1181         close ($file);
1182
1183         return %cfg;
1184 }
1185
1186 sub convert_file {
1187         (my $in, my $out, my $encoding) = @_;
1188
1189         print_verbose(0, 0, "$in -> $out");
1190
1191         my $ref_in = ref($in);
1192         my $ref_out = ref($out);
1193
1194         if ($in eq '') {
1195                 $in = \*STDIN;
1196                 $ref_in = 1;
1197         }
1198         if ($out eq '') {
1199                 $out = \*STDOUT;
1200                 $ref_out = 1;
1201         }
1202
1203         unless ($ref_in) {
1204                 my $path = $in;
1205                 $in = undef;
1206                 unless (open $in, "<:encoding($encoding)", encode('locale_fs', $path)) {
1207                         print STDERR "Cannot open input file $path.\n";
1208                         exit 2;
1209                 }
1210         }
1211         unless ($ref_out) {
1212                 my $path = $out;
1213                 $out = undef;
1214                 unless (open $out, ">:encoding($encoding)", encode('locale_fs', $path)) {
1215                         print STDERR "Cannot open output file $path.\n";
1216                         unless ($ref_in) {
1217                                 close($in)
1218                         }
1219                         exit 2;
1220                 }
1221         }
1222
1223         # TODO: This is extremely inefficient. and does not scale well. Improve!
1224         my $status;
1225         LINE: while (defined(my $line = <$in>)) {
1226
1227                 while ($line =~ /$replace_line/gs) {
1228                         my $new_line = apply_pattern($1, $&, undef);
1229                         if (defined($new_line)) {
1230                                 print $out "$new_line\n";
1231                                 next LINE;
1232                         }
1233                 }
1234                 $line =~ s/[\r\n]//g;
1235                 $line =~ s/$replace_keyword/apply_pattern($1,$&,$&)/gse;
1236
1237                 print $out "$line\n";
1238         }
1239
1240         unless ($ref_out) {
1241                 close($out);
1242         }
1243         unless ($ref_in) {
1244                 close($in);
1245         }
1246 }