From 8ec5e0a239e1a4aedff48b4ef8c7c27e8bf921eb Mon Sep 17 00:00:00 2001 From: b Date: Mon, 29 May 2023 22:36:13 +0000 Subject: [PATCH] some attempt at encoding by name. --- botm_common.pm | 67 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 19 deletions(-) diff --git a/botm_common.pm b/botm_common.pm index e5c5f01..48d1b48 100644 --- a/botm_common.pm +++ b/botm_common.pm @@ -25,14 +25,14 @@ use Exporter; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); # vX.Y.Z: X YYYZZZ -$VERSION = 1.000005; +$VERSION = 1.000006; @ISA = qw(Exporter); @EXPORT = (); @EXPORT_OK = ( 'read_data_file', 'write_data_file', 'url_encode', 'url_decode', 'join_path', - 'html_entity_encode_dec', 'html_entity_encode_hex' + 'html_entity_encode_dec', 'html_entity_encode_hex', 'html_entity_encode_name' ); %EXPORT_TAGS = (); @@ -2587,60 +2587,89 @@ use constant HTML_ENTITY_CODE_INF => { sub html_entity_encode_dec { - # (my $t, my $encoding, my $all) = @_; (my $t, my $all) = @_; if ($all) { - # $t =~ s/(.)/html_entity_encode_1ch_dec($1, $encoding)/eg; $t =~ s/(.)/html_entity_encode_1ch_dec($1)/eg; } else { - # $t =~ s/([\"\&<=>])/html_entity_encode_1ch_dec($1, $encoding)/eg; $t =~ s/([\"\&<=>])/html_entity_encode_1ch_dec($1)/eg; } return $t; } sub html_entity_encode_hex { - # (my $t, my $encoding, my $all) = @_; (my $t, my $all) = @_; if ($all) { - # $t =~ s/(.)/html_entity_encode_1ch_hex($1, $encoding)/eg; $t =~ s/(.)/html_entity_encode_1ch_hex($1)/eg; } else { - # $t =~ s/([\"\&<=>])/html_entity_encode_1ch_hex($1, $encoding)/eg; $t =~ s/([\"\&<=>])/html_entity_encode_1ch_hex($1)/eg; } return $t; } +sub html_entity_encode_name { + (my $t, my $all) = @_; + + if ($all) { + $t =~ s/(.)/html_entity_encode_1ch_name($1,1)/eg; + } + else { + $t =~ s/([\"\&<=>])/html_entity_encode_1ch_name($1,0)/eg; + } +} + sub html_entity_encode_1ch_dec { - # (my $ch, my $encoding) = @_; (my $ch) = @_; - # if ($encoding ne '') { - # # escape byte values instead of code point value - # $ch = encode($encoding, $ch); - # } $ch =~ s/(.)/sprintf('&#%02u;',ord($1))/eg; return $ch; } sub html_entity_encode_1ch_hex { - # (my $ch, my $encoding) = @_; (my $ch) = @_; - # if ($encoding ne '') { - # # escape byte values instead of code point value - # $ch = encode($encoding, $ch); - # } $ch =~ s/(.)/sprintf('&#x%02X;',ord($1))/eg; return $ch; } -# TODO: +sub html_entity_encode_1ch_name { + (my $ch, my $all) = @_; + + unless ($all) { + if ($ch eq '"') { + return '"'; + } + elsif ($ch eq '&') { + return '&'; + } + elsif ($ch eq '<') { + return '<' + } + elsif ($ch eq '=') { + return '&equals'; + } + elsif ($ch eq '>') { + return '>'; + } + else { + return $ch; + } + } + else { + my $n = ord($ch); + foreach my $name (keys HTML_ENTITY_CODE) { + if (HTML_ENTITY_CODE->{$name} == [$n]) { + return "&$name;"; + } + } + return $ch; + } +} + + # html_entity_encode_name # html_entity_decode -- 2.30.2