'read_data_file', 'write_data_file',
'url_encode', 'url_decode',
'url_query_encode', 'url_query_decode',
- 'split_url',
+ 'split_url', 'join_url', 'merge_url',
'html_entity_encode_dec', 'html_entity_encode_hex', 'html_entity_encode_name', 'html_entity_decode',
'join_path',
);
# - query
# - fragment
-# split_url splits an URL (string) into its components and returns
+# split_url() splits an URL (string) into its components and returns
# them as a hash.
# $url is the URL to decompose
-# if $relative is true then
+# if $relative is true and scheme is not present in the URL
+# then the URL is assumed to start from the (relative in this case)
+# path.
+# URL is assumed without checking to be correctly formatted.
+# the individual parts are not further decoded
sub split_url {
(my $url, my $relative) = @_;
my %data;
}
+# join_url() builds an URL from its components and returns it as a string.
+# $data is a reference to the hash containing URL components.
+# They are assumed without checking to already be correctly encoded.
sub join_url {
(my $data) = @_;
if ($data->{'host'} ne '') {
$url = $data->{'host'};
if ($data->{'port'} ne '') {
- $url .= ':' $data->{'port'};
+ $url .= ':' . $data->{'port'};
}
if ($data->{'userinfo'} ne '') {
$url = $data->{'userinfo'} . '@' . $url;
return $url;
}
+# merge_url() will create a new URL based on
+# $base_url and $target_url.
+# both can be a string or hash reference,
+# $target_url if without scheme, host, authority will be treated as
+# a relative link and applied to $base_url
+# otherwise $target_url is absolute.
+# ".." in the path is not resolved so "/aaa/bbb" + "../ccc" will be
+# "/aaa/bbb/../ccc" and not "/aaa/ccc". TODO: improve?
+sub merge_url {
+ (my $base_url, my $target_url) = @_;
+ my %base_data;
+ my %target_data;
+ my %final_data;
+ my $final_url;
+ if (ref($base_url)) {
+ %base_data = %$base_url;
+ }
+ else {
+ %base_data = split_url($base_url);
+ }
+ if (ref($target_url)) {
+ %target_data = %$target_url;
+ }
+ else {
+ %target_data = split_url($target_url, 1);
+ }
+
+ if (
+ ($target_data{'scheme'} ne '') or
+ ($target_data{'authority'} ne '') or
+ ($target_data{'host'} ne '')
+ ) {
+ return join_url(\%target_data);
+ }
+
+ $final_data{'scheme'} = $base_data{'scheme'};
+ $final_data{'authority'} = $base_data{'authority'};
+ $final_data{'host'} = $base_data{'host'};
+ $final_data{'port'} = $base_data{'port'};
+ $final_data{'userinfo'} = $base_data{'userinfo'};
+ $final_data{'path'} = $base_data{'path'};
+ $final_data{'query'} = $base_data{'query'};
+ $final_data{'fragment'} = $base_data{'fragment'};
+
+ if ($target_data{'path'} ne '') {
+ $final_data{'path'} = join_path('/', $base_data{'path'}, $target_data{'path'});
+ $final_data{'query'} = '';
+ $final_data{'fragment'} = '';
+ }
+
+ if ($target_data{'query'} ne '') {
+ $final_data{'query'} = $target_data{'query'};
+ $final_data{'fragment'} = '';
+ }
+
+ if ($target_data{'fragment'} ne '') {
+ $final_data{'fragment'} = $target_data{'fragment'};
+ }
+
+ return join_url(\%final_data);
+}
+
# URL query string
sub url_query_encode {