'read_data_file', 'write_data_file',
'url_encode', 'url_decode',
'join_path',
- 'html_entity_encode_dec', 'html_entity_encode_hex', 'html_entity_encode_name'
+ 'html_entity_encode_dec', 'html_entity_encode_hex', 'html_entity_encode_name', 'html_entity_decode',
);
%EXPORT_TAGS = ();
}
$line =~ s/[\n]$//g;
+ $line =~ s/[\r]$//g;
# Empty line - end of header.
if ($line eq ''){
}
if ($all) {
- $t =~ s/(.)/url_encode_1ch($1, $encoding)/eg;
+ $t =~ s/(.)/url_encode_1ch($1, $encoding)/egs;
}
else {
- $t =~ s/([^0-9A-Za-z.~\-_])/url_encode_1ch($1, $encoding)/eg;
+ $t =~ s/([^0-9A-Za-z.~\-_])/url_encode_1ch($1, $encoding)/egs;
}
return $t;
}
# url_encode_1ch() escapes a single chatacter using URL-encoding.
-# $ch is the text to encode.
+# $ch is the chatacter to encode.
# $encoding is the encoding to use
sub url_encode_1ch {
(my $ch, my $encoding) = @_;
$encoding = "UTF-8";
}
$ch = encode($encoding, $ch);
- $ch =~ s/(.)/sprintf('%%%02X',ord($1))/eg;
+ $ch =~ s/(.)/sprintf('%%%02X',ord($1))/egs;
return $ch;
}
if ($encoding eq '') {
$encoding = "utf8";
}
- $t =~ s/((%[0-9A-Fa-f]{2})+)/url_decode_xch($1)/eg;
+ $t =~ s/((%[0-9A-Fa-f]{2})+)/url_decode_xch($1)/egs;
return $t;
}
-# url_decode_xc() decodes a continuous string of characters escaped
+# url_decode_xch() decodes a continuous string of characters escaped
# $xch is the text to decode - it is assumed without checking
# that the text is indeed in this format.
sub url_decode_xch {
'yuml' => [0x000FF]
};
-
+# html_entity_encode_dec() escapes a text using HTML entity references
+# in decimal form.
+# only reserved characters will be escaped.
+# $t is the text to encode
+# if $all is true then every character will be escaped.
sub html_entity_encode_dec {
(my $t, my $all) = @_;
if ($all) {
- $t =~ s/(.)/html_entity_encode_1ch_dec($1)/eg;
+ $t =~ s/(.)/html_entity_encode_1ch_dec($1)/egs;
}
else {
- $t =~ s/([\"\&<=>])/html_entity_encode_1ch_dec($1)/eg;
+ $t =~ s/([\"\&<=>])/html_entity_encode_1ch_dec($1)/egs;
}
return $t;
}
+# html_entity_encode_hex() escapes a text using HTML entity references
+# in hexadecimal form.
+# only reserved characters will be escaped.
+# $t is the text to encode
+# if $all is true then every character will be escaped.
sub html_entity_encode_hex {
(my $t, my $all) = @_;
if ($all) {
- $t =~ s/(.)/html_entity_encode_1ch_hex($1)/eg;
+ $t =~ s/(.)/html_entity_encode_1ch_hex($1)/egs;
}
else {
- $t =~ s/([\"\&<=>])/html_entity_encode_1ch_hex($1)/eg;
+ $t =~ s/([\"\&<=>])/html_entity_encode_1ch_hex($1)/egs;
}
return $t;
}
+# html_entity_encode_name() escapes a text using HTML entity references
+# by name.
+# only reserved characters will be escaped.
+# $t is the text to encode
+# if $all is true then every character which has a name will be escaped,
+# and additionally if a character has more than 1 possible name, then
+# the name will be chosen unpredicatbly.
sub html_entity_encode_name {
(my $t, my $all) = @_;
if ($all) {
- $t =~ s/(.)/html_entity_encode_1ch_name($1,1)/eg;
+ $t =~ s/(.)/html_entity_encode_1ch_name($1,1)/egs;
}
else {
- $t =~ s/([\"\&<=>])/html_entity_encode_1ch_name($1,0)/eg;
+ $t =~ s/([\"\&<=>])/html_entity_encode_1ch_name($1,0)/egs;
}
return $t;
}
+# html_entity_encode_dec() escapes a single character
+# as an HTML entity reference in decimal form.
+# $ch is the chatacter to encode
sub html_entity_encode_1ch_dec {
(my $ch) = @_;
- $ch =~ s/(.)/sprintf('&#%02u;',ord($1))/eg;
+ $ch =~ s/(.)/sprintf('&#%02u;',ord($1))/egs;
return $ch;
}
+# html_entity_encode_1ch_hex() escapes a single character
+# as an HTML entity reference in hexadecimal form.
+# $ch is the chatacter to encode
sub html_entity_encode_1ch_hex {
(my $ch) = @_;
- $ch =~ s/(.)/sprintf('&#x%02X;',ord($1))/eg;
+ $ch =~ s/(.)/sprintf('&#x%02X;',ord($1))/egs;
return $ch;
}
+# html_entity_encode_1ch_name() escapes a single character
+# as an HTML entity reference by name.
+# only a reserved character will be escaped.
+# $ch is the chatacter to encode
+# if $all is true then any character which has a name will be escaped,
+# and additionally if the character has more than 1 possible name, then
+# the name will be chosen unpredicatbly.
sub html_entity_encode_1ch_name {
(my $ch, my $all) = @_;
}
}
+# html_entity_decode() decodes a text escaped by HTML entity references
+# in any of the available forms.
+# invalid entity references will be left unchanged.
+# $t is the text to decode
+sub html_entity_decode {
+ (my $t) = @_;
+
+ my $d = '';
+
+ while ($t ne '') {
+ if ($t =~ /^([^\&]+)(\&.*)$/s) { # any normal text
+ $d .= $1;
+ $t = $2;
+ }
+ elsif ($t =~ /^(\&#?[A-Za-z0-9]+;)(.*)$/s) { # correct encoded character
+ $d .= html_entity_decode_1en($1);
+ $t = $2;
+ }
+ elsif ($t =~ /^(\&[A-Za-z0-9]+)(.*)$/s) { # encoded character without ";"
+ $d .= html_entity_decode_1en($1);
+ $t = $2;
+ }
+ elsif ($t =~ /^(\&)(.*)$/s) { # invalid "&"
+ $d .= $1;
+ $t = $2;
+ }
+ else { # nothing left to decode
+ $d .= $t;
+ $t = '';
+ }
+ }
+
+ return $d;
+}
-# html_entity_encode_name
-# html_entity_decode
+# html_entity_decode_1en() decodes a single HTML entity reference
+# in any of the available forms.
+# an invalid entity reference will be left unchanged.
+# $en is the text to decode - it is assumed without checking
+# that the text is indeed in this format.
+sub html_entity_decode_1en {
+ (my $en) = @_;
+ my $y = '';
+
+ if ($en !~ /;$/) { # name without ";"
+ my $n = substr($en, 1);
+ # we HAVE TO iterate :/
+ foreach my $name (keys %{+HTML_ENTITY_CODE_INF}) {
+ if (index($n, $name) == 0) { # name (beginning of entire string) is valid:
+ # decode
+ my @code = @{HTML_ENTITY_CODE_INF->{$name}};
+ foreach my $c (@code) {
+ $y .= chr($c);
+ }
+ # rest of the string, insert literally
+ $y .= substr($n, length($name));
+ return $y;
+ }
+ }
+ # invalid name, insert literally
+ return $en;
+ }
+ elsif ($en =~ /^\&#([0-9]+);$/) { # decimal
+ return chr($1);
+ }
+ elsif ($en =~ /^\&#x([0-9A-Fa-f]+);$/) { # hexadecimal
+ return chr(hex($1));
+ }
+ else { # name
+ my $name = substr($en, 1, length($en) - 2);
+ if (defined HTML_ENTITY_CODE->{$name}) {
+ # valid name, decode
+ my @code = @{HTML_ENTITY_CODE->{$name}};
+ foreach my $c (@code) {
+ $y .= chr($c);
+ }
+ return $y;
+ }
+ else {
+ # invalid name, insert literally
+ return $en;
+ }
+ }
+}