From: b Date: Sat, 10 Jun 2023 08:01:25 +0000 (+0000) Subject: merge URL, bugfix X-Git-Tag: v1.0.9 X-Git-Url: http://bicyclesonthemoon.info/git-projects/?a=commitdiff_plain;h=refs%2Ftags%2Fv1.0.9;p=botm%2Fcommon-perl merge URL, bugfix --- diff --git a/botm_common.pm b/botm_common.pm index 3c7a4ad..f2d48ca 100644 --- a/botm_common.pm +++ b/botm_common.pm @@ -32,7 +32,7 @@ $VERSION = 1.000009; 'read_data_file', 'write_data_file', 'url_encode', 'url_decode', 'url_query_encode', 'url_query_decode', - 'split_url', + 'split_url', 'join_url', 'merge_url', 'html_entity_encode_dec', 'html_entity_encode_hex', 'html_entity_encode_name', 'html_entity_decode', 'join_path', ); @@ -374,10 +374,14 @@ sub url_decode_xch { # - query # - fragment -# split_url splits an URL (string) into its components and returns +# split_url() splits an URL (string) into its components and returns # them as a hash. # $url is the URL to decompose -# if $relative is true then +# if $relative is true and scheme is not present in the URL +# then the URL is assumed to start from the (relative in this case) +# path. +# URL is assumed without checking to be correctly formatted. +# the individual parts are not further decoded sub split_url { (my $url, my $relative) = @_; my %data; @@ -473,6 +477,9 @@ sub split_url { } +# join_url() builds an URL from its components and returns it as a string. +# $data is a reference to the hash containing URL components. +# They are assumed without checking to already be correctly encoded. sub join_url { (my $data) = @_; @@ -481,7 +488,7 @@ sub join_url { if ($data->{'host'} ne '') { $url = $data->{'host'}; if ($data->{'port'} ne '') { - $url .= ':' $data->{'port'}; + $url .= ':' . $data->{'port'}; } if ($data->{'userinfo'} ne '') { $url = $data->{'userinfo'} . '@' . $url; @@ -513,6 +520,68 @@ sub join_url { return $url; } +# merge_url() will create a new URL based on +# $base_url and $target_url. +# both can be a string or hash reference, +# $target_url if without scheme, host, authority will be treated as +# a relative link and applied to $base_url +# otherwise $target_url is absolute. +# ".." in the path is not resolved so "/aaa/bbb" + "../ccc" will be +# "/aaa/bbb/../ccc" and not "/aaa/ccc". TODO: improve? +sub merge_url { + (my $base_url, my $target_url) = @_; + my %base_data; + my %target_data; + my %final_data; + my $final_url; + if (ref($base_url)) { + %base_data = %$base_url; + } + else { + %base_data = split_url($base_url); + } + if (ref($target_url)) { + %target_data = %$target_url; + } + else { + %target_data = split_url($target_url, 1); + } + + if ( + ($target_data{'scheme'} ne '') or + ($target_data{'authority'} ne '') or + ($target_data{'host'} ne '') + ) { + return join_url(\%target_data); + } + + $final_data{'scheme'} = $base_data{'scheme'}; + $final_data{'authority'} = $base_data{'authority'}; + $final_data{'host'} = $base_data{'host'}; + $final_data{'port'} = $base_data{'port'}; + $final_data{'userinfo'} = $base_data{'userinfo'}; + $final_data{'path'} = $base_data{'path'}; + $final_data{'query'} = $base_data{'query'}; + $final_data{'fragment'} = $base_data{'fragment'}; + + if ($target_data{'path'} ne '') { + $final_data{'path'} = join_path('/', $base_data{'path'}, $target_data{'path'}); + $final_data{'query'} = ''; + $final_data{'fragment'} = ''; + } + + if ($target_data{'query'} ne '') { + $final_data{'query'} = $target_data{'query'}; + $final_data{'fragment'} = ''; + } + + if ($target_data{'fragment'} ne '') { + $final_data{'fragment'} = $target_data{'fragment'}; + } + + return join_url(\%final_data); +} + # URL query string sub url_query_encode {