From db991a5689cb962f1167e6125230ea344dbebd72 Mon Sep 17 00:00:00 2001 From: b Date: Thu, 10 Aug 2023 17:33:24 +0000 Subject: [PATCH] save & relink [img] ot mustardtime otherthread --- bot2m.1.pl | 236 +++++++++++++++++++++++++++++++++++++++++++ makefile | 12 ++- makefile.1.mak | 12 ++- ottmirror.1.cron | 12 ++- settings-release.txt | 8 +- settings.txt | 31 +++++- 6 files changed, 295 insertions(+), 16 deletions(-) create mode 100644 bot2m.1.pl diff --git a/bot2m.1.pl b/bot2m.1.pl new file mode 100644 index 0000000..573d6a7 --- /dev/null +++ b/bot2m.1.pl @@ -0,0 +1,236 @@ +# Copyright (C) 2023 Balthasar Szczepański +# bot2m.pl automatically generated from bot2m.1.pl + +# This file is part of OTT mirror. +# +# OTT mirror is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affreo General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# OTT mirror is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affreo General Public License for more details. +# +# You should have received a copy of the GNU Affreo General Public License +# along with OTT mirror. If not, see . + +use strict; +use utf8; +use Encode::Locale ('decode_argv'); +use Encode ('encode', 'decode'); + +###PERL_LIB: use lib '/botm/lib/post'; +use botm_common ( + 'read_data_file', 'write_data_file', + 'join_path', 'dir_path', 'make_temp_path', + 'html_entity_decode', + 'url_decode', + 'merge_url', + 'system_encoded' +); +use post_common ( + 'wget' +); + +###PERL_MPOST_PATH: use constant MPOST_PATH => '/botm/data/ottmirror/mpost' +###PERL_OWN_DOMAIN: use constant OWN_DOMAIN => '1190.bicyclesonthemoon.info' +###PERL_WWW_IMG_PATH: use constant WWW_IMG_PATH => '/botm/www/1190/ott/image' +###PERL_IMG_URL: use constant IMG_URL => 'http://1190.bicyclesonthemoon.info/ott/image' +###PERL_TMP_PATH: use constant TMP_PATH => '/botm/tmp/post'; +###PERL_WAYBACK: use constant WAYBACK => 'http://web.archive.org/web/'; +###PERL_BIN_IMG_PATH: use constant BIN_IMG_PATH => '/botm/bin/ottmirror/image'; +###PERL_MKDIR: use constant MKDIR => 'mkdir'; +###PERL_MV: use constant MV => 'mv'; +###PERL_SENDPOST: use constant SENDPOST => '/botm/bin/post/sendpost'; + + +binmode STDIN, ':encoding(console_in)'; +binmode STDOUT, ':encoding(console_out)'; +binmode STDERR, ':encoding(console_out)'; +decode_argv(); + +my $dh; +if (opendir $dh, encode('locale_fs', MPOST_PATH)) { + my @fl = readdir $dh; # <-- encoding??? + closedir $dh; + + foreach my $fn (@fl) { + if ($fn =~ /^[0-9]+$/) { + my %post = read_data_file(join_path('/',MPOST_PATH,$fn), 'utf8'); + + if ($post{'disable_bbcode'}) { + next; + } + + my $changed = 0; + my $message = $post{'bb'}; + my $new_message = ''; + $message =~ s//\n/gs; + $message = html_entity_decode($message); + while ($message ne '') { + if ($message =~ m/(\[img\]([^\[\]]*)\[\/img\])/gs) { + my $img = $1; + my $img_url = $2; + $new_message .= substr($message, 0, pos($message) - length($img)); + $message = substr($message, pos($message)); + + my $new_img_url = save_img($img_url); + if ($new_img_url ne $img_url) { + $changed = 1; + print "$fn: $img_url -> $new_img_url\n"; + } + $new_message .= '[img]'.$new_img_url.'[/img]'; # !!! + + # exit 0; + } + else { + $new_message .= $message; + $message = ''; + } + } + if ($changed) { + my $tmp_file = make_temp_path(TMP_PATH, 'bot2m.mpost.txt'); + + my %new_post = (); + $new_post{'content'} = $new_message; + $new_post{'username'} = url_decode($post{'username'}); + $new_post{'password'} = url_decode($post{'password'}); + $new_post{'subject'} = url_decode($post{'password'}); + if ($post{'disable_bbcode'}) { + $new_post{'bbcode'} = '0'; + } + else { + $new_post{'bbcode'} = '1'; + } + if ($post{'disable_smilies'}) { + $new_post{'smilies'} = '0'; + } + else { + $new_post{'smilies'} = '1'; + } + if ($post{'disable_magic_url'}) { + $new_post{'urls'} = '0'; + } + else { + $new_post{'urls'} = '1'; + } + if ($post{'attach_sig'}) { + $new_post{'signature'} = '1'; + } + else { + $new_post{'signature'} = '0'; + } + if ($post{'notify'}) { + $new_post{'notify'} = '1'; + } + else { + $new_post{'notify'} = '0'; + } + write_data_file($tmp_file, 'utf8', 0, \%new_post); + my $r = system_encoded(SENDPOST, (SENDPOST, '--mirror', '--edit=m'.$fn, $tmp_file)); + unlink $tmp_file; + } + } + } +} + +sub save_img { + (my $url) = @_; + if ($url !~ /^https?:\/\//) { + return $url; + } + if ($url =~ ("^https?://".OWN_DOMAIN)) { + return $url; + } + + my $escaped = img_save_url($url); + my $path = join_path('/',WWW_IMG_PATH, $escaped); + my $new_url = merge_url(IMG_URL, $escaped); + + my $findpath = encode('locale_fs', $path.'.*'); + if (my @gfp = glob($findpath)) { + print "Already saved $url\n"; + return $new_url; + } + else { + print "not yet saved $findpath\n"; + my $tmp_path = make_temp_path(TMP_PATH, 'bot2m.img.tmp'); + my %wget_options = ('no-check-certificate' => 1); + my $r; + my $ext = ''; + print "GET $url\n"; + $r = wget($url, $tmp_path, \%wget_options); + if ($r != 0) { + my $wayback_url = join_path('/',WAYBACK, $url); + print "GET $wayback_url\n"; + $r = wget($wayback_url, $tmp_path, \%wget_options,); + if ($r != 0) { + print "Failed to get $url\n"; + unlink $tmp_path; + return $url; + } + } + $r = system_encoded(BIN_IMG_PATH, (BIN_IMG_PATH, $tmp_path)) >> 8; + if ($r == 1) { + $ext = '.gif'; + } + elsif ($r == 2) { + $ext = '.png'; + } + elsif ($r == 3) { + $ext = '.jpg'; + } + elsif ($r == 4) { + $ext = '.bmp'; + } + else { + print "unknown format\n"; + unlink $tmp_path; + return $url; + } + $path .= $ext; + (my $dir, my $fn) = dir_path('/',$path); + + if ($dir ne '') { + system_encoded(MKDIR, (MKDIR, '-p', $dir)); + } + $r = system_encoded(MV, (MV, '-n', $tmp_path, $path)); + + if ($r != 0) { + print "Failed to mv $tmp_path $path\n"; + unlink $tmp_path; + return $url; + } + print "SAVED $path\n"; + unlink $tmp_path; + return $new_url; + } +} + +sub img_save_url { + (my $url) = @_; + + my $escaped = ''; + my $final = ''; + + for my $i (0 .. length($url)-1) { + my $ch = substr($url, $i, 1); + if ($ch =~ /[A-Za-z0-9]/) { + $escaped .= $ch; + } + else { + $ch = encode('utf8', $ch); + $ch =~ s/(.)/sprintf('_%02X',ord($1))/egs; + $escaped .= $ch; + } + } + while (length($escaped) > 240) { + $final = join_path('/',$final, substr($escaped, 0, 200)); + $escaped = substr($escaped, 200); + } + $final = join_path('/',$final, $escaped); + + return $final; +} diff --git a/makefile b/makefile index 8ee9328..aaef9dc 100644 --- a/makefile +++ b/makefile @@ -87,7 +87,8 @@ posted.1.c\ preview.1.awk\ update.1.c\ view.1.awk\ -view.1.c +view.1.c\ +bot2m.1.pl TO_GENERATE=\ bot2.awk\ bot2.c\ @@ -106,7 +107,8 @@ posted.c\ preview.awk\ update.c\ view.awk\ -view.c +view.c\ +bot2m.pl C=\ bot2\ @@ -158,7 +160,8 @@ update\ upload.awk\ verify.awk\ view\ -view.awk +view.awk\ +bot2m.pl SRC=\ agpl.txt\ @@ -193,7 +196,8 @@ upload.awk\ verify.awk\ view.1.c\ view.1.awk\ -www +www\ +bot2m.1.pl DIR=\ diff --git a/makefile.1.mak b/makefile.1.mak index 0d0d571..f1449fa 100644 --- a/makefile.1.mak +++ b/makefile.1.mak @@ -87,7 +87,8 @@ posted.1.c\ preview.1.awk\ update.1.c\ view.1.awk\ -view.1.c +view.1.c\ +bot2m.1.pl TO_GENERATE=\ bot2.awk\ bot2.c\ @@ -106,7 +107,8 @@ posted.c\ preview.awk\ update.c\ view.awk\ -view.c +view.c\ +bot2m.pl C=\ bot2\ @@ -158,7 +160,8 @@ update\ upload.awk\ verify.awk\ view\ -view.awk +view.awk\ +bot2m.pl SRC=\ agpl.txt\ @@ -193,7 +196,8 @@ upload.awk\ verify.awk\ view.1.c\ view.1.awk\ -www +www\ +bot2m.1.pl DIR=\ diff --git a/ottmirror.1.cron b/ottmirror.1.cron index 3f96c51..edc1b70 100644 --- a/ottmirror.1.cron +++ b/ottmirror.1.cron @@ -15,9 +15,13 @@ #mustard post ###cron_blitz_schedule; ###cron_user; ###bin_bot3_path; +#mustard reketchup +###cron_mustard_reketchup_schedule; ###cron_user; ###perl; ###bin_bot2m_path; > ###log_bot2m_path; + #log relocation #TODO: Replace with better mechanism! -###cron_log_relocation_schedule; ###cron_user; ###mv; ###log_bot2_path;1 ###log_bot2_path;1.lastweek -###cron_log_relocation_schedule; ###cron_user; ###mv; ###log_bot2_path;2 ###log_bot2_path;2.lastweek -###cron_log_relocation_schedule; ###cron_user; ###mv; ###log_bot2_path;3 ###log_bot2_path;3.lastweek -###cron_log_relocation_schedule; ###cron_user; ###mv; ###log_bot3_path; ###log_bot3_path;.lastweek +# ###cron_log_relocation_schedule; ###cron_user; ###mv; ###log_bot2_path;1 ###log_bot2_path;1.lastweek +# ###cron_log_relocation_schedule; ###cron_user; ###mv; ###log_bot2_path;2 ###log_bot2_path;2.lastweek +# ###cron_log_relocation_schedule; ###cron_user; ###mv; ###log_bot2_path;3 ###log_bot2_path;3.lastweek +# ###cron_log_relocation_schedule; ###cron_user; ###mv; ###log_bot3_path; ###log_bot3_path;.lastweek +###cron_log_relocation_schedule; ###cron_user; ###mv; ###log_bot2m_path; ###log_bot2m_path;.lastweek diff --git a/settings-release.txt b/settings-release.txt index 977583c..cdfec61 100644 --- a/settings-release.txt +++ b/settings-release.txt @@ -14,6 +14,8 @@ configure: /botm/bin/config/configure.pl useragent2: bothasar_t (http://1190.bicyclesonthemoon.info/ott/; Time thread mirror bot) useragent3: bothasar_p (http://1190.bicyclesonthemoon.info/ott/; Time thread post bot) +postlib_path: /botm/lib/test-post + bin_path: /botm/bin/ottmirror conf_path: /botm/etc/www/conf/1190 cron_path: /etc/cron.d @@ -45,6 +47,8 @@ bot3name: bothasar_p bot3_url: http://1190.bicyclesonthemoon.info/bothasar_p/ default_subject: Re: 1190: "Time" +sendpost: /botm/bin/post/sendpost + awk: /usr/bin/mawk cat: /usr/bin/cat chmod: /usr/bin/chmod @@ -80,4 +84,6 @@ cron_present_reketchup_options: -r -o5 -m10 -p2 -w9 -v5 -d -a -b cron_mustard_post_schedule: #26 23 * * * -cron_log_relocation_schedule: #0 0 * * 1 \ No newline at end of file +cron_mustard_reketchup_schedule: 32 4 * * * + +cron_log_relocation_schedule: 0 0 * * 1 \ No newline at end of file diff --git a/settings.txt b/settings.txt index f92c99b..4cce191 100644 --- a/settings.txt +++ b/settings.txt @@ -17,9 +17,16 @@ # You should have received a copy of the GNU Affreo General Public License # along with OTT mirror. If not, see . + +_PERL_STR: '@_ESCAPE($0,')' +_PERL_USE_2: use $0 $1; +_PERL_CONSTANT: use constant $0 => $1; +_PERL_CONSTANT_STR: @_PERL_CONSTANT($0,@_PERL_STR($1)) + _AWK_DEFINE_STR: $0="@_ESCAPE($1)" _C_DEFINE_STR: #define $0 "@_ESCAPE($1)" + _bot3name_encoded = @_URL_ENCODE($bot3name) _own_domain_encoded = @_URL_ENCODE($own_domain) _own_url_encoded = @_URL_ENCODE($own_url) @@ -40,6 +47,7 @@ _bin_awk_upload_path = @_PATH($bin_path, upload.awk) _bin_awk_verify_path = @_PATH($bin_path, verify.awk) _bin_awk_view_path = @_PATH($bin_path, view.awk) bin_bot2_path = @_PATH($bin_path, bot2) +bin_bot2m_path = @_PATH($bin_path, bot2m.pl) bin_bot3_path = @_PATH($bin_path, bot3) _bin_findpost_path = @_PATH($bin_path, findpost) _bin_image_path = @_PATH($bin_path, image) @@ -52,11 +60,14 @@ _bin_redirect_path = @_PATH($bin_path, redirect) _bin_update_path = @_PATH($bin_path, update) _bin_view_path = @_PATH($bin_path, view) +_url_img = @_PATH($own_url, image) + _conf_path = @_PATH($conf_path, $name\.conf) _cron_path = @_PATH($cron_path, $name) -log_bot2_path = @_PATH($log_path, bot2.log_) -log_bot3_path = @_PATH($log_path, bot3.log) +log_bot2_path = @_PATH($log_path, bot2.log_) +log_bot2m_path = @_PATH($log_path, bot2m.log) +log_bot3_path = @_PATH($log_path, bot3.log) _mem_last_path = @_PATH($mem_path, lasttime.) _mem_lasttm_path_3 = @_PATH($mem_path, bot3last) @@ -292,4 +303,18 @@ C_AWK_VERIFY = @_C_DEFINE_STR(AWK_VERIFY, $_bin_awk_verify_path) C_AWK_VIEW = @_C_DEFINE_STR(AWK_VIEW, $_bin_awk_view_path) C_BOT_PATH_2 = @_C_DEFINE_STR(BOT_PATH, $bin_bot2_path) -C_ = @_C_DEFINE_STR(uvw, $xyz) +#C_ = @_C_DEFINE_STR(uvw, $xyz) + + +PERL_LIB = @_PERL_USE_2(lib, @_PERL_STR($postlib_path)) + +PERL_MPOST_PATH = @_PERL_CONSTANT_STR( MPOST_PATH , $_mem_post_path) +PERL_OWN_DOMAIN = @_PERL_CONSTANT_STR( OWN_DOMAIN , $own_domain) +PERL_WWW_IMG_PATH = @_PERL_CONSTANT_STR( WWW_IMG_PATH, $_www_img_path) +PERL_IMG_URL = @_PERL_CONSTANT_STR( IMG_URL , $_url_img) +PERL_TMP_PATH = @_PERL_CONSTANT_STR( TMP_PATH , $tmp_path) +PERL_WAYBACK = @_PERL_CONSTANT_STR( WAYBACK , $wayback_addr) +PERL_BIN_IMG_PATH = @_PERL_CONSTANT_STR( BIN_IMG_PATH, $_bin_image_path) +PERL_MKDIR = @_PERL_CONSTANT_STR( MKDIR , $mkdir) +PERL_MV = @_PERL_CONSTANT_STR( MV , $mv) +PERL_SENDPOST = @_PERL_CONSTANT_STR( SENDPOST , $sendpost) -- 2.30.2