use Exporter;
-our $VERSION = '1.1.2';
+our $VERSION = '1.1.3';
our @ISA = qw(Exporter);
our @EXPORT = ();
our @EXPORT_OK = (
'unassigned' => 509,
'not_extended' => 510,
'network_authentication required' => 511,
-
};
sub http_header_line {
# If $all is true then every character will be escaped
sub url_encode {
(my $t, my $encoding, my $all) = @_;
-
if ($encoding eq '') {
$encoding = 'UTF-8';
}
-
if ($all) {
- $t =~ s/(.)/url_encode_1ch($1, $encoding)/egs;
+ $t =~ s/./url_encode_1ch($&,$encoding)/egs;
}
else {
- $t =~ s/([^0-9A-Za-z.~\-_])/url_encode_1ch($1, $encoding)/egs;
+ $t =~ s/[^0-9A-Za-z.~\-_]/url_encode_1ch($&,$encoding)/egs;
}
return $t;
}
# $encoding is the encoding to use
sub url_encode_1ch {
(my $ch, my $encoding) = @_;
-
- if ($encoding eq '') {
- $encoding = 'UTF-8';
- }
$ch = encode($encoding, $ch);
- $ch =~ s/(.)/sprintf('%%%02X',ord($1))/egs;
+ $ch =~ s/./sprintf('%%%02X',ord($&))/egs;
return $ch;
}
# if $plus is true then '+' will be decoded to ' '.
sub url_decode {
(my $t, my $encoding, my $plus) = @_;
-
if ($encoding eq '') {
$encoding = 'utf8';
}
if ($plus) {
$t =~ s/\+/ /gs;
}
- $t =~ s/((%[0-9A-Fa-f]{2})+)/url_decode_xch($1, $encoding)/egs;
+ $t =~ s/(%[0-9A-Fa-f]{2})+/url_decode_xch($&,$encoding)/egs;
return $t;
}
sub url_decode_xch {
(my $xch, my $encoding) = @_;
my $y = '';
-
- if ($encoding eq '') {
- $encoding = 'utf8';
- }
while ($xch ne '') {
$y .= chr(hex(substr($xch, 1, 2)));
$xch = substr($xch, 3);
'yen' => [0x000A5],
'yuml' => [0x000FF]
};
+# here a reverse map: from code to name, but only HTML 4.01
+# See https://www.w3.org/TR/1999/PR-html40-19990824/sgml/entities.html
+use constant HTML_ENTITY_NAME => {
+ 0x0022 =>'quot',
+ 0x0026 =>'amp',
+ 0x003C =>'lt',
+ 0x003E =>'gt',
+ 0x00A0 =>'nbsp',
+ 0x00A1 =>'iexcl',
+ 0x00A2 =>'cent',
+ 0x00A3 =>'pound',
+ 0x00A4 =>'curren',
+ 0x00A5 =>'yen',
+ 0x00A6 =>'brvbar',
+ 0x00A7 =>'sect',
+ 0x00A8 =>'uml',
+ 0x00A9 =>'copy',
+ 0x00AA =>'ordf',
+ 0x00AB =>'laquo',
+ 0x00AC =>'not',
+ 0x00AD =>'shy',
+ 0x00AE =>'reg',
+ 0x00AF =>'macr',
+ 0x00B0 =>'deg',
+ 0x00B1 =>'plusmn',
+ 0x00B2 =>'sup2',
+ 0x00B3 =>'sup3',
+ 0x00B4 =>'acute',
+ 0x00B5 =>'micro',
+ 0x00B6 =>'para',
+ 0x00B7 =>'middot',
+ 0x00B8 =>'cedil',
+ 0x00B9 =>'sup1',
+ 0x00BA =>'ordm',
+ 0x00BB =>'raquo',
+ 0x00BC =>'frac14',
+ 0x00BD =>'frac12',
+ 0x00BE =>'frac34',
+ 0x00BF =>'iquest',
+ 0x00C0 =>'Agrave',
+ 0x00C1 =>'Aacute',
+ 0x00C2 =>'Acirc',
+ 0x00C3 =>'Atilde',
+ 0x00C4 =>'Auml',
+ 0x00C5 =>'Aring',
+ 0x00C6 =>'AElig',
+ 0x00C7 =>'Ccedil',
+ 0x00C8 =>'Egrave',
+ 0x00C9 =>'Eacute',
+ 0x00CA =>'Ecirc',
+ 0x00CB =>'Euml',
+ 0x00CC =>'Igrave',
+ 0x00CD =>'Iacute',
+ 0x00CE =>'Icirc',
+ 0x00CF =>'Iuml',
+ 0x00D0 =>'ETH',
+ 0x00D1 =>'Ntilde',
+ 0x00D2 =>'Ograve',
+ 0x00D3 =>'Oacute',
+ 0x00D4 =>'Ocirc',
+ 0x00D5 =>'Otilde',
+ 0x00D6 =>'Ouml',
+ 0x00D7 =>'times',
+ 0x00D8 =>'Oslash',
+ 0x00D9 =>'Ugrave',
+ 0x00DA =>'Uacute',
+ 0x00DB =>'Ucirc',
+ 0x00DC =>'Uuml',
+ 0x00DD =>'Yacute',
+ 0x00DE =>'THORN',
+ 0x00DF =>'szlig',
+ 0x00E0 =>'agrave',
+ 0x00E1 =>'aacute',
+ 0x00E2 =>'acirc',
+ 0x00E3 =>'atilde',
+ 0x00E4 =>'auml',
+ 0x00E5 =>'aring',
+ 0x00E6 =>'aelig',
+ 0x00E7 =>'ccedil',
+ 0x00E8 =>'egrave',
+ 0x00E9 =>'eacute',
+ 0x00EA =>'ecirc',
+ 0x00EB =>'euml',
+ 0x00EC =>'igrave',
+ 0x00ED =>'iacute',
+ 0x00EE =>'icirc',
+ 0x00EF =>'iuml',
+ 0x00F0 =>'eth',
+ 0x00F1 =>'ntilde',
+ 0x00F2 =>'ograve',
+ 0x00F3 =>'oacute',
+ 0x00F4 =>'ocirc',
+ 0x00F5 =>'otilde',
+ 0x00F6 =>'ouml',
+ 0x00F7 =>'divide',
+ 0x00F8 =>'oslash',
+ 0x00F9 =>'ugrave',
+ 0x00FA =>'uacute',
+ 0x00FB =>'ucirc',
+ 0x00FC =>'uuml',
+ 0x00FD =>'yacute',
+ 0x00FE =>'thorn',
+ 0x00FF =>'yuml',
+ 0x0152 =>'OElig',
+ 0x0153 =>'oelig',
+ 0x0160 =>'Scaron',
+ 0x0161 =>'scaron',
+ 0x0178 =>'Yuml',
+ 0x0192 =>'fnof',
+ 0x02C6 =>'circ',
+ 0x02DC =>'tilde',
+ 0x0391 =>'Alpha',
+ 0x0392 =>'Beta',
+ 0x0393 =>'Gamma',
+ 0x0394 =>'Delta',
+ 0x0395 =>'Epsilon',
+ 0x0396 =>'Zeta',
+ 0x0397 =>'Eta',
+ 0x0398 =>'Theta',
+ 0x0399 =>'Iota',
+ 0x039A =>'Kappa',
+ 0x039B =>'Lambda',
+ 0x039C =>'Mu',
+ 0x039D =>'Nu',
+ 0x039E =>'Xi',
+ 0x039F =>'Omicron',
+ 0x03A0 =>'Pi',
+ 0x03A1 =>'Rho',
+ 0x03A3 =>'Sigma',
+ 0x03A4 =>'Tau',
+ 0x03A5 =>'Upsilon',
+ 0x03A6 =>'Phi',
+ 0x03A7 =>'Chi',
+ 0x03A8 =>'Psi',
+ 0x03A9 =>'Omega',
+ 0x03B1 =>'alpha',
+ 0x03B2 =>'beta',
+ 0x03B3 =>'gamma',
+ 0x03B4 =>'delta',
+ 0x03B5 =>'epsilon',
+ 0x03B6 =>'zeta',
+ 0x03B7 =>'eta',
+ 0x03B8 =>'theta',
+ 0x03B9 =>'iota',
+ 0x03BA =>'kappa',
+ 0x03BB =>'lambda',
+ 0x03BC =>'mu',
+ 0x03BD =>'nu',
+ 0x03BE =>'xi',
+ 0x03BF =>'omicron',
+ 0x03C0 =>'pi',
+ 0x03C1 =>'rho',
+ 0x03C2 =>'sigmaf',
+ 0x03C3 =>'sigma',
+ 0x03C4 =>'tau',
+ 0x03C5 =>'upsilon',
+ 0x03C6 =>'phi',
+ 0x03C7 =>'chi',
+ 0x03C8 =>'psi',
+ 0x03C9 =>'omega',
+ 0x03D1 =>'thetasym',
+ 0x03D2 =>'upsih',
+ 0x03D6 =>'piv',
+ 0x2002 =>'ensp',
+ 0x2003 =>'emsp',
+ 0x2009 =>'thinsp',
+ 0x200C =>'zwnj',
+ 0x200D =>'zwj',
+ 0x200E =>'lrm',
+ 0x200F =>'rlm',
+ 0x2013 =>'ndash',
+ 0x2014 =>'mdash',
+ 0x2018 =>'lsquo',
+ 0x2019 =>'rsquo',
+ 0x201A =>'sbquo',
+ 0x201C =>'ldquo',
+ 0x201D =>'rdquo',
+ 0x201E =>'bdquo',
+ 0x2020 =>'dagger',
+ 0x2021 =>'Dagger',
+ 0x2022 =>'bull',
+ 0x2026 =>'hellip',
+ 0x2030 =>'permil',
+ 0x2032 =>'prime',
+ 0x2033 =>'Prime',
+ 0x2039 =>'lsaquo',
+ 0x203A =>'rsaquo',
+ 0x203E =>'oline',
+ 0x2044 =>'frasl',
+ 0x20AC =>'euro',
+ 0x2111 =>'image',
+ 0x2118 =>'weierp',
+ 0x211C =>'real',
+ 0x2122 =>'trade',
+ 0x2135 =>'alefsym',
+ 0x2190 =>'larr',
+ 0x2191 =>'uarr',
+ 0x2192 =>'rarr',
+ 0x2193 =>'darr',
+ 0x2194 =>'harr',
+ 0x21B5 =>'crarr',
+ 0x21D0 =>'lArr',
+ 0x21D1 =>'uArr',
+ 0x21D2 =>'rArr',
+ 0x21D3 =>'dArr',
+ 0x21D4 =>'hArr',
+ 0x2200 =>'forall',
+ 0x2202 =>'part',
+ 0x2203 =>'exist',
+ 0x2205 =>'empty',
+ 0x2207 =>'nabla',
+ 0x2208 =>'isin',
+ 0x2209 =>'notin',
+ 0x220B =>'ni',
+ 0x220F =>'prod',
+ 0x2211 =>'sum',
+ 0x2212 =>'minus',
+ 0x2217 =>'lowast',
+ 0x221A =>'radic',
+ 0x221D =>'prop',
+ 0x221E =>'infin',
+ 0x2220 =>'ang',
+ 0x2227 =>'and',
+ 0x2228 =>'or',
+ 0x2229 =>'cap',
+ 0x222A =>'cup',
+ 0x222B =>'int',
+ 0x2234 =>'there4',
+ 0x223C =>'sim',
+ 0x2245 =>'cong',
+ 0x2248 =>'asymp',
+ 0x2260 =>'ne',
+ 0x2261 =>'equiv',
+ 0x2264 =>'le',
+ 0x2265 =>'ge',
+ 0x2282 =>'sub',
+ 0x2283 =>'sup',
+ 0x2284 =>'nsub',
+ 0x2286 =>'sube',
+ 0x2287 =>'supe',
+ 0x2295 =>'oplus',
+ 0x2295 =>'otimes',
+ 0x22A5 =>'perp',
+ 0x22C5 =>'sdot',
+ 0x2308 =>'lceil',
+ 0x230A =>'lfloor',
+ 0x230B =>'rfloor',
+ 0x2329 =>'lang',
+ 0x232A =>'rang',
+ 0x25CA =>'loz',
+ 0x2660 =>'spades',
+ 0x2663 =>'clubs',
+ 0x2665 =>'hearts',
+ 0x2666 =>'diams'
+};
+
# html_entity_encode_dec() escapes a text using HTML entity references
# in decimal form.
-# only reserved characters will be escaped.
+# reserved characters will be escaped.
# $t is the text to encode
# if $non_ascii is true then every non-ASCII character will be escaped
# if $all is true then every character will be escaped.
sub html_entity_encode_dec {
(my $t, my $non_ascii, my $all) = @_;
-
- $t =~ s/(.)/html_entity_encode_1ch_dec($1,$non_ascii,$all)/egs;
+ if ($all) {
+ $t =~ s/./'&#'.ord($&).';'/egs;
+ }
+ elsif ($non_ascii) {
+ $t =~ s/[\"\&<=>\P{ASCII}]/'&#'.ord($&).';'/egs;
+ }
+ else {
+ $t =~ s/[\"\&<=>]/'&#'.ord($&).';'/egs;
+ }
return $t;
}
# html_entity_encode_hex() escapes a text using HTML entity references
# in hexadecimal form.
-# only reserved characters will be escaped.
+# reserved characters will be escaped.
# $t is the text to encode
# if $non_ascii is true then every non-ASCII character will be escaped
# if $all is true then every character will be escaped.
sub html_entity_encode_hex {
(my $t, my $non_ascii, my $all) = @_;
-
- $t =~ s/(.)/html_entity_encode_1ch_hex($1,$non_ascii,$all)/egs;
+ if ($all) {
+ $t =~ s/./sprintf('&#x%X;',ord($&))/egs;
+ }
+ elsif ($non_ascii) {
+ $t =~ s/[\"\&<=>\P{ASCII}]/sprintf('&#x%X;',ord($&))/egs;
+ }
+ else {
+ $t =~ s/[\"\&<=>]/sprintf('&#x%X;',ord($&))/egs;
+ }
return $t;
}
# html_entity_encode_name() escapes a text using HTML entity references
# by name.
-# only reserved characters will be escaped.
+# reserved characters will be escaped.
# $t is the text to encode
# if $non_ascii is true then every non-ASCII character will be escaped
# if $all is true then every character will be escaped.
# or if $fallback is 'hex' then character without name will be escaped
# in hexadecimal form instead
# or otherwise they will not be escaped
-# if a character has more than 1 possible name, then
-# the name will be chosen unpredicatbly.
sub html_entity_encode_name {
(my $t, my $non_ascii, my $all, my $fallback) = @_;
- $t =~ s/(.)/html_entity_encode_1ch_name($1,$non_ascii,$all,$fallback)/egs;
- return $t;
-}
-
-# html_entity_encode_dec() escapes a single character
-# as an HTML entity reference in decimal form.
-# $ch is the chatacter to encode
-# if $non_ascii is true then every non-ASCII character will be escaped
-# if $all is true then every character will be escaped.
-sub html_entity_encode_1ch_dec {
- (my $ch, my $non_ascii, my $all) = @_;
-
- my $n = ord($ch);
- if (
- $all or
- ($non_ascii and ($n > 0x7F)) or
- ($ch =~ /[\"\&<=>]/)
- ) {
- return sprintf('&#%02u;', $n);
- }
- else {
- return $ch;
+ if ($all) {
+ $t =~ s/./html_entity_encode_1ch_name($&,$fallback)/egs;
}
-}
-
-# html_entity_encode_1ch_hex() escapes a single character
-# as an HTML entity reference in hexadecimal form.
-# $ch is the chatacter to encode
-# if $non_ascii is true then every non-ASCII character will be escaped
-# if $all is true then every character will be escaped.
-sub html_entity_encode_1ch_hex {
- (my $ch, my $non_ascii, my $all) = @_;
-
- my $n = ord($ch);
- if (
- $all or
- ($non_ascii and ($n > 0x7F)) or
- ($ch =~ /[\"\&<=>]/)
- ) {
- return sprintf('&#x%02X;', $n);
+ elsif ($non_ascii) {
+ $t =~ s/[\"\&<=>\P{ASCII}]/html_entity_encode_1ch_name($&,$fallback)/egs;
}
else {
- return $ch;
+ $t =~ s/[\"\&<=>]/html_entity_encode_1ch_name($&,$fallback)/egs;
}
+ return $t;
}
# html_entity_encode_1ch_name() escapes a single character
# as an HTML entity reference by name.
-# only a reserved character will be escaped.
# $ch is the chatacter to encode
-# if $non_ascii is true then every non-ASCII character will be escaped
-# if $all is true then every character will be escaped.
# if $fallback is 'dec' then character without name will be escaped in
# decimal form instead
# or if $fallback is 'hex' then character without name will be escaped
# in hexadecimal form instead
# or otherwise they will not be escaped
-# if a character has more than 1 possible name, then
-# the name will be chosen unpredicatbly.
sub html_entity_encode_1ch_name {
- (my $ch, my $non_ascii, my $all, my $fallback) = @_;
-
+ (my $ch, my $fallback) = @_;
my $n = ord($ch);
-
- if ($ch =~ /[\"\&<=>]/) {
- if ($ch eq '"') {
- return '"';
- }
- elsif ($ch eq '&') {
- return '&';
- }
- elsif ($ch eq '<') {
- return '<';
- }
- elsif ($ch eq '=') {
- return '=';
- }
- elsif ($ch eq '>') {
- return '>';
- }
- else {
- return $ch;
- }
+ my $name = HTML_ENTITY_NAME->{$n};
+ if ($name ne '') {
+ return "&$name;";
}
- elsif (
- $all or
- ($non_ascii and ($n > 0x7F))
- ) {
- foreach my $name (keys %{+HTML_ENTITY_CODE}) {
- my @code = @{HTML_ENTITY_CODE->{$name}};
- if (($code[0] == $n) and (@code == 1)) {
- return "&$name;";
- }
- }
-
- if ($fallback eq 'hex') {
- return html_entity_encode_1ch_hex($ch, $non_ascii, $all);
- }
- elsif ($fallback eq 'dec') {
- return html_entity_encode_1ch_dec($ch, $non_ascii, $all);
- }
+ elsif ($fallback eq 'dec') {
+ return "&#$n;";
+ }
+ elsif ($fallback eq 'hex') {
+ return sprintf('&#x%X;', $n);
+ }
+ else {
+ return $ch;
}
-
- return $ch;
}
# html_entity_decode() decodes a text escaped by HTML entity references
# $t is the text to decode
sub html_entity_decode {
(my $t) = @_;
-
- my $d = '';
-
- while ($t ne '') {
- if ($t =~ /^([^\&]+)(\&.*)$/s) { # any normal text
- $d .= $1;
- $t = $2;
- }
- elsif ($t =~ /^\&#?[A-Za-z0-9]+;/s) { # correct encoded character
- $d .= html_entity_decode_1en($&);
- $t = $';
- }
- elsif ($t =~ /^\&[A-Za-z0-9]+/s) { # encoded character without ";"
- $d .= html_entity_decode_1en($&);
- $t = $';
- }
- elsif ($t =~ /^\&/s) { # invalid "&"
- $d .= $&;
- $t = $';
- }
- else { # nothing left to decode
- $d .= $t;
- $t = '';
- }
- }
-
- return $d;
+ $t =~ s/&((#(([0-9]+)|(x[0-9A-Fa-f]+));)|([A-Za-z0-9]+;?))/html_entity_decode_1en($&)/egs;
+ return $t;
}
# html_entity_decode_1en() decodes a single HTML entity reference
# in any of the available forms.
# an invalid entity reference will be left unchanged.
-# $en is the text to decode - it is assumed without checking
-# that the text is indeed in this format.
+# $en is the text to decode
sub html_entity_decode_1en {
(my $en) = @_;
- my $y = '';
-
- if ($en !~ /;$/) { # name without ";"
- my $n = substr($en, 1);
- # we HAVE TO iterate as we don't know where name terminates :/
- # why they thought this is a good idea ?
- foreach my $name (keys %{+HTML_ENTITY_CODE_INF}) {
- if (index($n, $name) == 0) { # name (beginning of entire string) is valid:
- # decode
- my @code = @{HTML_ENTITY_CODE_INF->{$name}};
- foreach my $c (@code) {
- $y .= chr($c);
- }
- # rest of the string, insert literally
- $y .= substr($n, length($name));
- return $y;
- }
- }
- # invalid name, insert literally
- return $en;
- }
- elsif ($en =~ /^\&#([0-9]+);$/) { # decimal
+ if ($en =~ /^&#([0-9]+);$/) {
return chr($1);
}
- elsif ($en =~ /^\&#x([0-9A-Fa-f]+);$/) { # hexadecimal
+ elsif ($en =~ /^&#x([0-9A-Fa-f]+);$/) {
return chr(hex($1));
}
- else { # name
- my $name = substr($en, 1, length($en) - 2);
- if (defined HTML_ENTITY_CODE->{$name}) {
- # valid name, decode
- my @code = @{HTML_ENTITY_CODE->{$name}};
- foreach my $c (@code) {
- $y .= chr($c);
+ elsif ($en =~ /^&([A-Za-z0-9]+)(;?)$/) {
+ my $y = '';
+ if ($2 ne '') {
+ if (defined HTML_ENTITY_CODE->{$1}) {
+ my @code = @{HTML_ENTITY_CODE->{$1}};
+ foreach my $c (@code) {
+ $y .= chr($c);
+ }
+ return $y;
}
- return $y;
}
else {
- # invalid name, insert literally
- return $en;
+ # we HAVE TO iterate as we don't know where name terminates :/
+ # why they thought this is a good idea ?
+ foreach my $name (keys %{+HTML_ENTITY_CODE_INF}) {
+ if (index($1, $name) == 0) { # name (beginning of entire string) is valid:
+ # decode
+ my @code = @{HTML_ENTITY_CODE_INF->{$name}};
+ foreach my $c (@code) {
+ $y .= chr($c);
+ }
+ # rest of the string, insert literally
+ $y .= substr($1, length($name));
+ return $y;
+ }
+ }
}
}
+ return $en;
}
1