From e1be78e37af4babc91749361a9b726386ff8ef14 Mon Sep 17 00:00:00 2001 From: b Date: Thu, 8 Jun 2023 14:05:36 +0000 Subject: [PATCH] Split URL into parts --- botm_common.pm | 100 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 2 deletions(-) diff --git a/botm_common.pm b/botm_common.pm index 8d607b0..5dec1da 100644 --- a/botm_common.pm +++ b/botm_common.pm @@ -25,13 +25,14 @@ use Exporter; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); # vX.Y.Z: X YYYZZZ -$VERSION = 1.000007; +$VERSION = 1.000008; @ISA = qw(Exporter); @EXPORT = (); @EXPORT_OK = ( 'read_data_file', 'write_data_file', 'url_encode', 'url_decode', - 'url_query_encode', 'url_query_decode',= + 'url_query_encode', 'url_query_decode', + 'split_url', 'html_entity_encode_dec', 'html_entity_encode_hex', 'html_entity_encode_name', 'html_entity_decode', 'join_path', ); @@ -404,6 +405,101 @@ sub url_query_decode { # } +sub split_url { + (my $url, my $relative) = @_; + my %data; + my $ind; + + # scheme + $ind = index($url, '://'); + if ($ind >= 0) { + $data{'scheme'} = substr($url, 0, $ind); + $url = substr($url, $ind+3); + } + else { + $data{'scheme'} = ''; + } + + # authority + if ($relative and ($data{'scheme'} eq '')) { + $data{'authority'} = ''; + } + else { + $data{'authority'} = $url; + if ($data{'authority'} =~ m/[\/\?#]/g) { + $ind = pos($data{'authority'})-1; + $url = substr($data{'authority'}, $ind); + $data{'authority'} = substr($data{'authority'}, 0, $ind); + } + else { + $url = ''; + } + } + + # userinfo, host, port + $ind = index($data{'authority'}, '@'); + if ($ind >= 0) { + $data{'userinfo'} = substr($data{'authority'}, 0, $ind); + $data{'host'} = substr($data{'authority'}, $ind+1); + } + else { + $data{'userinfo'} = ''; + $data{'host'} = $data{'authority'}; + } + $data{'host'} =~ m/\[[^\]]*\]/g; + if ($data{'host'} =~ m/:/g) { + $ind = pos($data{'host'})-1; + $data{'port'} = substr($data{'host'}, $ind+1); + $data{'host'} = substr($data{'host'}, 0, $ind); + } + else { + $data{'port'} = ''; + } + + # path + if (($url =~ /^\//) or $relative) { + $data{'path'} = $url; + if ($data{'path'} =~ m/[\?#]/g) { + $ind = pos($data{'path'})-1; + $url = substr($data{'path'}, $ind); + $data{'path'} = substr($data{'path'}, 0, $ind); + } + else { + $url = ''; + } + } + else { + $data{'path'} = ''; + } + + # query + if ($url =~ /^\?/) { + $data{'query'} = substr($url, 1); + $ind = index($data{'query'}, '#'); + if ($ind >= 0) { + $url = substr($data{'query'}, $ind); + $data{'query'} = substr($data{'query'}, 0 ,$ind); + } + else { + $url = ''; + } + } + else { + $data{'query'} = ''; + } + + # fragment + if ($url =~ /^#/) { + $data{'fragment'} = substr($url, 1); + } + else { + $data{'fragment'} = ''; + } + + return %data; + +} + # # https://datatracker.ietf.org/doc/html/rfc3986#section-3 # sub join_url { # ( -- 2.30.2