From: b <rowerynaksiezycu@gmail.com>
Date: Mon, 15 May 2023 21:45:52 +0000 (+0000)
Subject: URL encoding
X-Git-Tag: v1.0.3
X-Git-Url: http://bicyclesonthemoon.info/git-projects/?a=commitdiff_plain;h=6cc2a0295896e671fa71946ca6428f0c806a9a77;p=botm%2Fcommon-perl

URL encoding
---

diff --git a/botm_common.pm b/botm_common.pm
index a2f7abd..300f00a 100644
--- a/botm_common.pm
+++ b/botm_common.pm
@@ -25,10 +25,14 @@ use Exporter;
 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
 
 # vX.Y.Z:      X YYYZZZ
-$VERSION     = 1.000002;
+$VERSION     = 1.000003;
 @ISA         = qw(Exporter);
 @EXPORT      = ();
-@EXPORT_OK   = qw(readdatafile writedatafile join_path);
+@EXPORT_OK   = (
+	'read_data_file', 'write_data_file',
+	'url_encode', 'url_decode',
+	'join_path'
+);
 %EXPORT_TAGS = ();
 
 ##################
@@ -66,7 +70,7 @@ $VERSION     = 1.000002;
 # processing done.
 
 
-# readdatafile() reads a data file and returns a hash with the data values.
+# read_data_file() reads a data file and returns a hash with the data values.
 #
 # $file is the path to the file to read.
 # file will be opened, read, and closed.
@@ -82,7 +86,7 @@ $VERSION     = 1.000002;
 #
 # if $no_header is true then it is assumed that file contains just
 # the content and no header.
-sub readdatafile {
+sub read_data_file {
 	(my $file, my $encoding, my $no_header) = @_;
 	my $fh;
 	my %data;
@@ -168,19 +172,19 @@ sub readdatafile {
 # will be written, truncated to new size and not closed afterwards.
 #
 # $encoding is the text encoding of the file to write.
-# if left empty, this will default to "utf8".
+# if left empty, this will default to "UTF-8".
 # encoding of an already opened file will not be changed by this.
 #
 # if $no_header is true then only the content is written and
 # the header not.
 #
 # $data is the reference to the hash containing data to be written.
-sub writedatafile {
+sub write_data_file {
 	(my $file, my $encoding, my $no_header, my $data) = @_;
 	my $fh;
 	
 	if ($encoding eq '') {
-		$encoding = 'utf8';
+		$encoding = 'UTF-8';
 	}
 	
 	# check if $file is actually a path or maybe a filehandle
@@ -227,6 +231,103 @@ sub writedatafile {
 
 
 
+####################
+##  URL ENCODING  ##
+####################
+
+# See https://datatracker.ietf.org/doc/html/rfc3986
+# Here the url-encoding (percent encoding, URI escaping, ...) works
+# like this:
+# The text can consist of 3 types of characters:
+#  - reserved characters:
+#    : / ? # [ ] @ ! $ & ' ( ) * + , ; =
+#  - unreserved characters:
+#    A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
+#    a b c d e f g h i j k l m n o p q r s t u v w x y z
+#    0 1 2 3 4 5 6 7 8 9 - . _ ~
+#  - all other characters.
+# Unreserved characters are always allowed inside an URI
+# and don't need escaping.
+# Reserved characters might need escaping.
+# All other characters are not allowed and need escaping.
+#
+# A character to be escaped is encoded using a specified encoding
+# (the default choice being UTF-8)
+# and then each of the resulting bytes is represented as a 2 digit
+# hexadecimal number (case insensitive ) preceded by the "%" character.
+
+# url_encode() escapes a text using URL-encoding.
+# (any character which is not unreserved will be escaped.)
+# $t is the text to encode.
+# $encoding is the encoding to use
+# (if left empty, UTF-8 is assumed)
+# If $all is true then every character will be escaped
+sub url_encode {
+	(my $t, my $encoding, my $all) = @_;
+	
+	if ($encoding eq '') {
+		$encoding = "UTF-8";
+	}
+
+	if ($all) {
+		s/(.)/url_encode_1ch($1, $encoding)/eg;
+	}
+	else {
+		$t =~ s/([^0-9A-Za-z.~\-_])/url_encode_1ch($1, $encoding)/eg;
+	}
+	return $t;
+}
+
+# url_encode_1ch() escapes a single chatacter using URL-encoding.
+# $ch is the text to encode.
+# $encoding is the encoding to use
+sub url_encode_1ch {
+	(my $ch, my $encoding) = @_;
+	
+	if ($encoding eq '') {
+		$encoding = "UTF-8";
+	}
+	$ch = encode($encoding, $ch);
+	$ch =~ s/(.)/sprintf('%%%02X',ord($1))/eg;
+	return $ch;
+}
+
+# url_decode() decodes a text escaped by URL-encoding
+# $t is the text to decode.
+# $encoding is the encoding to use
+# (if left empty, utf8 is assumed)
+sub url_decode {
+	(my $t, my $encoding) = @_;
+	
+	if ($encoding eq '') {
+		$encoding = "utf8";
+	}
+	$t =~ s/((%[0-9A-Fa-f]{2})+)/url_decode_xch($1)/eg;
+	return $t;
+}
+
+# url_decode_xc() decodes a continuous string of characters escaped
+# $xch is the text to decode - it is assumed without checking
+# that the text is indeed in this format. 
+sub url_decode_xch {
+	(my $xch, my $encoding) = @_;
+	my $y = '';
+	
+	if ($encoding eq '') {
+		$encoding = "utf8";
+	}
+	while ($xch ne '') {
+		$y .= chr(hex(substr($xch, 1, 2)));
+		$xch = substr($xch, 3);
+	};
+	$y = decode($encoding, $y);
+	return $y;
+}
+
+
+
+# path stuff
+
 # join_path() builds a path (or url) from individual segments
 # that there will be 1 path separator brtween (and not 2 or 0).
 sub join_path {