From: b Date: Mon, 15 May 2023 21:45:52 +0000 (+0000) Subject: URL encoding X-Git-Tag: v1.0.3 X-Git-Url: http://bicyclesonthemoon.info/git-projects/?a=commitdiff_plain;h=6cc2a0295896e671fa71946ca6428f0c806a9a77;p=botm%2Fcommon-perl URL encoding --- diff --git a/botm_common.pm b/botm_common.pm index a2f7abd..300f00a 100644 --- a/botm_common.pm +++ b/botm_common.pm @@ -25,10 +25,14 @@ use Exporter; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); # vX.Y.Z: X YYYZZZ -$VERSION = 1.000002; +$VERSION = 1.000003; @ISA = qw(Exporter); @EXPORT = (); -@EXPORT_OK = qw(readdatafile writedatafile join_path); +@EXPORT_OK = ( + 'read_data_file', 'write_data_file', + 'url_encode', 'url_decode', + 'join_path' +); %EXPORT_TAGS = (); ################## @@ -66,7 +70,7 @@ $VERSION = 1.000002; # processing done. -# readdatafile() reads a data file and returns a hash with the data values. +# read_data_file() reads a data file and returns a hash with the data values. # # $file is the path to the file to read. # file will be opened, read, and closed. @@ -82,7 +86,7 @@ $VERSION = 1.000002; # # if $no_header is true then it is assumed that file contains just # the content and no header. -sub readdatafile { +sub read_data_file { (my $file, my $encoding, my $no_header) = @_; my $fh; my %data; @@ -168,19 +172,19 @@ sub readdatafile { # will be written, truncated to new size and not closed afterwards. # # $encoding is the text encoding of the file to write. -# if left empty, this will default to "utf8". +# if left empty, this will default to "UTF-8". # encoding of an already opened file will not be changed by this. # # if $no_header is true then only the content is written and # the header not. # # $data is the reference to the hash containing data to be written. -sub writedatafile { +sub write_data_file { (my $file, my $encoding, my $no_header, my $data) = @_; my $fh; if ($encoding eq '') { - $encoding = 'utf8'; + $encoding = 'UTF-8'; } # check if $file is actually a path or maybe a filehandle @@ -227,6 +231,103 @@ sub writedatafile { +#################### +## URL ENCODING ## +#################### + +# See https://datatracker.ietf.org/doc/html/rfc3986 +# Here the url-encoding (percent encoding, URI escaping, ...) works +# like this: +# The text can consist of 3 types of characters: +# - reserved characters: +# : / ? # [ ] @ ! $ & ' ( ) * + , ; = +# - unreserved characters: +# A B C D E F G H I J K L M N O P Q R S T U V W X Y Z +# a b c d e f g h i j k l m n o p q r s t u v w x y z +# 0 1 2 3 4 5 6 7 8 9 - . _ ~ +# - all other characters. +# Unreserved characters are always allowed inside an URI +# and don't need escaping. +# Reserved characters might need escaping. +# All other characters are not allowed and need escaping. +# +# A character to be escaped is encoded using a specified encoding +# (the default choice being UTF-8) +# and then each of the resulting bytes is represented as a 2 digit +# hexadecimal number (case insensitive ) preceded by the "%" character. + +# url_encode() escapes a text using URL-encoding. +# (any character which is not unreserved will be escaped.) +# $t is the text to encode. +# $encoding is the encoding to use +# (if left empty, UTF-8 is assumed) +# If $all is true then every character will be escaped +sub url_encode { + (my $t, my $encoding, my $all) = @_; + + if ($encoding eq '') { + $encoding = "UTF-8"; + } + + if ($all) { + s/(.)/url_encode_1ch($1, $encoding)/eg; + } + else { + $t =~ s/([^0-9A-Za-z.~\-_])/url_encode_1ch($1, $encoding)/eg; + } + return $t; +} + +# url_encode_1ch() escapes a single chatacter using URL-encoding. +# $ch is the text to encode. +# $encoding is the encoding to use +sub url_encode_1ch { + (my $ch, my $encoding) = @_; + + if ($encoding eq '') { + $encoding = "UTF-8"; + } + $ch = encode($encoding, $ch); + $ch =~ s/(.)/sprintf('%%%02X',ord($1))/eg; + return $ch; +} + +# url_decode() decodes a text escaped by URL-encoding +# $t is the text to decode. +# $encoding is the encoding to use +# (if left empty, utf8 is assumed) +sub url_decode { + (my $t, my $encoding) = @_; + + if ($encoding eq '') { + $encoding = "utf8"; + } + $t =~ s/((%[0-9A-Fa-f]{2})+)/url_decode_xch($1)/eg; + return $t; +} + +# url_decode_xc() decodes a continuous string of characters escaped +# $xch is the text to decode - it is assumed without checking +# that the text is indeed in this format. +sub url_decode_xch { + (my $xch, my $encoding) = @_; + my $y = ''; + + if ($encoding eq '') { + $encoding = "utf8"; + } + while ($xch ne '') { + $y .= chr(hex(substr($xch, 1, 2))); + $xch = substr($xch, 3); + }; + $y = decode($encoding, $y); + return $y; +} + + + +# path stuff + # join_path() builds a path (or url) from individual segments # that there will be 1 path separator brtween (and not 2 or 0). sub join_path {