URL encoding

author b <rowerynaksiezycu@gmail.com>

Mon, 15 May 2023 21:45:52 +0000 (21:45 +0000)

committer b <rowerynaksiezycu@gmail.com>

Mon, 15 May 2023 21:45:52 +0000 (21:45 +0000)
author b <rowerynaksiezycu@gmail.com>
Mon, 15 May 2023 21:45:52 +0000 (21:45 +0000)
committer b <rowerynaksiezycu@gmail.com>
Mon, 15 May 2023 21:45:52 +0000 (21:45 +0000)
diff --git a/botm_common.pm b/botm_common.pm

index a2f7abd48405382c6d142f51894baf3f734222c2..300f00a36ba3f4b3b53590c59fc7877f9a342936 100644 (file)
--- a/botm_common.pm
+++ b/botm_common.pm
@@ -25,10 +25,14 @@ use Exporter;
  use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  
  # vX.Y.Z:      X YYYZZZ
-$VERSION     = 1.000002;
+$VERSION     = 1.000003;
  @ISA         = qw(Exporter);
  @EXPORT      = ();
-@EXPORT_OK   = qw(readdatafile writedatafile join_path);
+@EXPORT_OK   = (
+       'read_data_file', 'write_data_file',
+       'url_encode', 'url_decode',
+       'join_path'
+);
  %EXPORT_TAGS = ();
  
  ##################
@@ -66,7 +70,7 @@ $VERSION     = 1.000002;
  # processing done.
  
  
-# readdatafile() reads a data file and returns a hash with the data values.
+# read_data_file() reads a data file and returns a hash with the data values.
  #
  # $file is the path to the file to read.
  # file will be opened, read, and closed.
@@ -82,7 +86,7 @@ $VERSION     = 1.000002;
  #
  # if $no_header is true then it is assumed that file contains just
  # the content and no header.
-sub readdatafile {
+sub read_data_file {
         (my $file, my $encoding, my $no_header) = @_;
         my $fh;
         my %data;
@@ -168,19 +172,19 @@ sub readdatafile {
  # will be written, truncated to new size and not closed afterwards.
  #
  # $encoding is the text encoding of the file to write.
-# if left empty, this will default to "utf8".
+# if left empty, this will default to "UTF-8".
  # encoding of an already opened file will not be changed by this.
  #
  # if $no_header is true then only the content is written and
  # the header not.
  #
  # $data is the reference to the hash containing data to be written.
-sub writedatafile {
+sub write_data_file {
         (my $file, my $encoding, my $no_header, my $data) = @_;
         my $fh;
         
         if ($encoding eq '') {
-               $encoding = 'utf8';
+               $encoding = 'UTF-8';
         }
         
         # check if $file is actually a path or maybe a filehandle
@@ -227,6 +231,103 @@ sub writedatafile {
  
  
  
+####################
+##  URL ENCODING  ##
+####################
+
+# See https://datatracker.ietf.org/doc/html/rfc3986
+# Here the url-encoding (percent encoding, URI escaping, ...) works
+# like this:
+# The text can consist of 3 types of characters:
+#  - reserved characters:
+#    : / ? # [ ] @ ! $ & ' ( ) * + , ; =
+#  - unreserved characters:
+#    A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
+#    a b c d e f g h i j k l m n o p q r s t u v w x y z
+#    0 1 2 3 4 5 6 7 8 9 - . _ ~
+#  - all other characters.
+# Unreserved characters are always allowed inside an URI
+# and don't need escaping.
+# Reserved characters might need escaping.
+# All other characters are not allowed and need escaping.
+#
+# A character to be escaped is encoded using a specified encoding
+# (the default choice being UTF-8)
+# and then each of the resulting bytes is represented as a 2 digit
+# hexadecimal number (case insensitive ) preceded by the "%" character.
+
+# url_encode() escapes a text using URL-encoding.
+# (any character which is not unreserved will be escaped.)
+# $t is the text to encode.
+# $encoding is the encoding to use
+# (if left empty, UTF-8 is assumed)
+# If $all is true then every character will be escaped
+sub url_encode {
+       (my $t, my $encoding, my $all) = @_;
+       
+       if ($encoding eq '') {
+               $encoding = "UTF-8";
+       }
+
+       if ($all) {
+               s/(.)/url_encode_1ch($1, $encoding)/eg;
+       }
+       else {
+               $t =~ s/([^0-9A-Za-z.~\-_])/url_encode_1ch($1, $encoding)/eg;
+       }
+       return $t;
+}
+
+# url_encode_1ch() escapes a single chatacter using URL-encoding.
+# $ch is the text to encode.
+# $encoding is the encoding to use
+sub url_encode_1ch {
+       (my $ch, my $encoding) = @_;
+       
+       if ($encoding eq '') {
+               $encoding = "UTF-8";
+       }
+       $ch = encode($encoding, $ch);
+       $ch =~ s/(.)/sprintf('%%%02X',ord($1))/eg;
+       return $ch;
+}
+
+# url_decode() decodes a text escaped by URL-encoding
+# $t is the text to decode.
+# $encoding is the encoding to use
+# (if left empty, utf8 is assumed)
+sub url_decode {
+       (my $t, my $encoding) = @_;
+       
+       if ($encoding eq '') {
+               $encoding = "utf8";
+       }
+       $t =~ s/((%[0-9A-Fa-f]{2})+)/url_decode_xch($1)/eg;
+       return $t;
+}
+
+# url_decode_xc() decodes a continuous string of characters escaped
+# $xch is the text to decode - it is assumed without checking
+# that the text is indeed in this format. 
+sub url_decode_xch {
+       (my $xch, my $encoding) = @_;
+       my $y = '';
+       
+       if ($encoding eq '') {
+               $encoding = "utf8";
+       }
+       while ($xch ne '') {
+               $y .= chr(hex(substr($xch, 1, 2)));
+               $xch = substr($xch, 3);
+       };
+       $y = decode($encoding, $y);
+       return $y;
+}
+
+
+
+# path stuff
+
  # join_path() builds a path (or url) from individual segments
  # that there will be 1 path separator brtween (and not 2 or 0).
  sub join_path {
author	b <rowerynaksiezycu@gmail.com>
	Mon, 15 May 2023 21:45:52 +0000 (21:45 +0000)
committer	b <rowerynaksiezycu@gmail.com>
	Mon, 15 May 2023 21:45:52 +0000 (21:45 +0000)