use Exporter;
-our $VERSION = '1.0.17';
+our $VERSION = '1.0.18';
our @ISA = qw(Exporter);
our @EXPORT = ();
our @EXPORT_OK = (
# sub join_url_host {
# (my $encoding, my @segments) = @_;
-
+ #
# my $host = '';
-
+ #
# foreach my $segment (@segments) {
# if ($host ne '') {
# $host = '.'.$host;
# }
-
############################
## HTML ENTITY ENCODING ##
############################
# in decimal form.
# only reserved characters will be escaped.
# $t is the text to encode
+# if $non_ascii is true then every non-ASCII character will be escaped
# if $all is true then every character will be escaped.
sub html_entity_encode_dec {
- (my $t, my $all) = @_;
+ (my $t, my $non_ascii, my $all) = @_;
- if ($all) {
- $t =~ s/(.)/html_entity_encode_1ch_dec($1)/egs;
- }
- else {
- $t =~ s/([\"\&<=>])/html_entity_encode_1ch_dec($1)/egs;
- }
+ $t =~ s/(.)/html_entity_encode_1ch_dec($1,$non_ascii,$all)/egs;
return $t;
}
# in hexadecimal form.
# only reserved characters will be escaped.
# $t is the text to encode
+# if $non_ascii is true then every non-ASCII character will be escaped
# if $all is true then every character will be escaped.
sub html_entity_encode_hex {
- (my $t, my $all) = @_;
+ (my $t, my $non_ascii, my $all) = @_;
- if ($all) {
- $t =~ s/(.)/html_entity_encode_1ch_hex($1)/egs;
- }
- else {
- $t =~ s/([\"\&<=>])/html_entity_encode_1ch_hex($1)/egs;
- }
+ $t =~ s/(.)/html_entity_encode_1ch_hex($1,$non_ascii,$all)/egs;
return $t;
}
# by name.
# only reserved characters will be escaped.
# $t is the text to encode
-# if $all is true then every character which has a name will be escaped,
-# and additionally if a character has more than 1 possible name, then
+# if $non_ascii is true then every non-ASCII character will be escaped
+# if $all is true then every character will be escaped.
+# if $fallback is 'dec' then character without name will be escaped in
+# decimal form instead
+# or if $fallback is 'hex' then character without name will be escaped
+# in hexadecimal form instead
+# or otherwise they will not be escaped
+# if a character has more than 1 possible name, then
# the name will be chosen unpredicatbly.
sub html_entity_encode_name {
- (my $t, my $all) = @_;
+ (my $t, my $non_ascii, my $all, my $fallback) = @_;
- if ($all) {
- $t =~ s/(.)/html_entity_encode_1ch_name($1,1)/egs;
- }
- else {
- $t =~ s/([\"\&<=>])/html_entity_encode_1ch_name($1,0)/egs;
- }
+ $t =~ s/(.)/html_entity_encode_1ch_name($1,$non_ascii,$all,$fallback)/egs;
return $t;
}
# html_entity_encode_dec() escapes a single character
# as an HTML entity reference in decimal form.
# $ch is the chatacter to encode
+# if $non_ascii is true then every non-ASCII character will be escaped
+# if $all is true then every character will be escaped.
sub html_entity_encode_1ch_dec {
- (my $ch) = @_;
+ (my $ch, my $non_ascii, my $all) = @_;
- $ch =~ s/(.)/sprintf('&#%02u;',ord($1))/egs;
- return $ch;
+ my $n = ord($ch);
+ if (
+ $all or
+ ($non_ascii and ($n > 0x7F)) or
+ ($ch =~ /[\"\&<=>]/)
+ ) {
+ return sprintf('&#%02u;', $n);
+ }
+ else {
+ return $ch;
+ }
}
# html_entity_encode_1ch_hex() escapes a single character
# as an HTML entity reference in hexadecimal form.
# $ch is the chatacter to encode
+# if $non_ascii is true then every non-ASCII character will be escaped
+# if $all is true then every character will be escaped.
sub html_entity_encode_1ch_hex {
- (my $ch) = @_;
+ (my $ch, my $non_ascii, my $all) = @_;
- $ch =~ s/(.)/sprintf('&#x%02X;',ord($1))/egs;
- return $ch;
+ my $n = ord($ch);
+ if (
+ $all or
+ ($non_ascii and ($n > 0x7F)) or
+ ($ch =~ /[\"\&<=>]/)
+ ) {
+ return sprintf('&#x%02X;', $n);
+ }
+ else {
+ return $ch;
+ }
}
# html_entity_encode_1ch_name() escapes a single character
# as an HTML entity reference by name.
# only a reserved character will be escaped.
# $ch is the chatacter to encode
-# if $all is true then any character which has a name will be escaped,
-# and additionally if the character has more than 1 possible name, then
+# if $non_ascii is true then every non-ASCII character will be escaped
+# if $all is true then every character will be escaped.
+# if $fallback is 'dec' then character without name will be escaped in
+# decimal form instead
+# or if $fallback is 'hex' then character without name will be escaped
+# in hexadecimal form instead
+# or otherwise they will not be escaped
+# if a character has more than 1 possible name, then
# the name will be chosen unpredicatbly.
sub html_entity_encode_1ch_name {
- (my $ch, my $all) = @_;
+ (my $ch, my $non_ascii, my $all, my $fallback) = @_;
+
+ my $n = ord($ch);
- unless ($all) {
+ if ($ch =~ /[\"\&<=>]/) {
if ($ch eq '"') {
return '"';
}
return $ch;
}
}
- else {
- my $n = ord($ch);
+ elsif (
+ $all or
+ ($non_ascii and ($n > 0x7F))
+ ) {
foreach my $name (keys %{+HTML_ENTITY_CODE}) {
my @code = @{HTML_ENTITY_CODE->{$name}};
if (($code[0] == $n) and (@code == 1)) {
return "&$name;";
}
}
- return $ch;
+
+ if ($fallback eq 'hex') {
+ return html_entity_encode_1ch_hex($ch, $non_ascii, $all);
+ }
+ elsif ($fallback eq 'dec') {
+ return html_entity_encode_1ch_dec($ch, $non_ascii, $all);
+ }
}
+
+ return $ch;
}
# html_entity_decode() decodes a text escaped by HTML entity references