From 5422ffd949793eafcf85c2cd19194c00aa3eca04 Mon Sep 17 00:00:00 2001 From: b Date: Sat, 8 Jul 2023 16:34:47 +0000 Subject: [PATCH] Posting to mirror now works OK --- botm-common | 2 +- sendpost.1.pl | 295 +++++++++++++++++++++++++++++++++++++------ settings-debug.txt | 3 + settings-release.txt | 7 +- settings.txt | 2 + 5 files changed, 270 insertions(+), 39 deletions(-) diff --git a/botm-common b/botm-common index e44d4fc..576e2fd 160000 --- a/botm-common +++ b/botm-common @@ -1 +1 @@ -Subproject commit e44d4fc88013f9ad09186ce3cfc73d93829103d2 +Subproject commit 576e2fd6f22344e66fda21142b48ad6280e6e719 diff --git a/sendpost.1.pl b/sendpost.1.pl index 4ae5fd9..d83b0a4 100644 --- a/sendpost.1.pl +++ b/sendpost.1.pl @@ -26,16 +26,21 @@ use botm_common ( 'read_data_file', 'write_data_file', 'write_postdata_file', 'read_header_file', 'merge_url', + 'html_entity_decode', 'url_query_encode', 'make_temp_path', 'system_encoded' ); -###PERL_ENCODING_FILE: use constant ENCODING_FILE => 'UTF-8'; +###PERL_ENCODING_FILE: use constant ENCODING_FILE => 'UTF-8'; +###PERL_ENCODING_OTT: use constant ENCODING_OTT => 'UTF-8'; +###PERL_ENCODING_MIRROR: use constant ENCODING_MIRROR => 'UTF-8'; ###PERL_DEFAULT_PASSWORD: use constant DEFAULT_PASSWORD => 'password'; ###PERL_DEFAULT_SUBJECT: use constant DEFAULT_SUBJECT => 'Re: 1190: "Time"'; ###PERL_DEFAULT_USERNAME: use constant DEFAULT_USERNAME => 'username'; +###PERL_WGET_RETRIES: use constant WGET_RETRIES => 3; +###PERL_WGET_TIMEOUT: use constant WGET_TIMEOUT => 60; ###PERL_WGET_USERAGENT: use constant WGET_USERAGENT => "post tool (http://bicyclesonthemoon.info/git-projects/?p=ott/post)"; ###PERL_WGET: use constant WGET => '/usr/bin/wget'; @@ -69,7 +74,9 @@ use constant HELP_TEXT => " -m, --no-mirror\n". " -R, --mirror-url=MIRROR_URL\n", "\n". - " -h, --help\n"; + " -h, --help\n". + " -q, --quiet\n". + " -v, --verbose\n"; binmode STDIN, ':encoding(console_in)'; binmode STDOUT, ':encoding(console_out)'; @@ -103,7 +110,9 @@ GetOptions ( 'no-mirror|m' => \$options{'no-mirror'}, 'mirror-url|R=s' => \$options{ 'mirror-url'}, - 'help|h' => \$options{ 'help'} + 'help|h' => \$options{'help'}, + 'quiet|q' => \$options{'quiet'}, + 'verbose|v' => \$options{'verbose'}, # TODO: useragent ); @@ -111,41 +120,95 @@ if ($options{'help'} ne '') { print HELP_TEXT; exit 0; } + +my $time = time(); +my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime($time); + +if ($options{'quiet'} ne '') { + $options{'verbose'} = ''; +} if ($options{'mirror-url'} eq '') { $options{'mirror-url'} = MIRROR_URL; } - -# print "OPTIONS:\n"; -# my $a = write_data_file(\*STDOUT, ENCODING_STD_OUT, 0, \%options); +unless ($options{'quiet'} ne '') { + print sprintf ( + "POST BOT $$ $time - %04d-%02d-%02d %02d:%02d:%02d\n", + $year+1900, $mon+1, $mday, $hour, $min, $sec + ); + if ($options{'verbose'} ne '') { + print "OPTIONS:\n"; + write_data_file(\*STDOUT, 'console_out', 0, \%options); + print "\n"; + } +} if ((scalar @ARGV) == 0) { @ARGV = (\*STDIN); } +my $code = 0; foreach my $arg (@ARGV) { - # print "\nFILE $arg\n"; - my %data = read_data_file($arg, ENCODING_FILE, $options{'data-only'}); - # foreach my $ind (keys %data) { - # print $ind.'='.$data{$ind}."\n"; - # } + my $r; + + unless ($options{'quiet'} ne '') { + if ($arg == \*STDIN) { + print "STDIN\n"; + } + else { + print "FILE $arg\n"; + } + } + + my %data = (); + unless (($arg == \*STDIN) and ($options{'content'} ne '') and ($options{'data-only'} ne '')) + { + %data = read_data_file($arg, ENCODING_FILE, $options{'data-only'}); + unless (keys %data) { + unless ($options{'quiet'} ne '') { + print "NO DATA\n"; + if ($code == 0) { + $code = 1; + } + } + print STDERR 'No data'; + unless ($arg == \*STDIN) { + print STDERR " in file $arg"; + } + print STDERR "\n"; + next; + } + } + if ($options{'verbose'} ne '') { + write_data_file(\*STDOUT, 'console_out', 0, \%data); + print "\n"; + } my %post = prepare_post(\%options, \%data); - # foreach my $ind (keys %post) { - # print $ind.'='.$post{$ind}."\n"; - # } + if ($options{'verbose'} ne '') { + print "POST\n"; + write_data_file(\*STDOUT, 'console_out', 0, \%post); + print "\n"; + } # unless ($options{'mirror'}) { - # if (post_to_ott (\%options, \%post)) { + # $r = post_to_ott (\%options, \%post)); + # if ($r == 0) { # next; # } # } unless ($options{'no-mirror'}) { - post_to_mirror(\%options, \%post); + $r = post_to_mirror(\%options, \%post); + if ($r == 0) { + next; + } + } + if ($code == 0) { + $code = $r; } } - +exit $code; @@ -270,32 +333,80 @@ sub prepare_post { sub post_to_mirror { (my $cmd_options, my $post) = @_; + my $post_url = merge_url($cmd_options->{'mirror-url'}, 'post'); + my $edit_url = $post_url; my $tmp_path = make_temp_path(TMP_PATH, 'sendpost.htm'); my $cookie_path = make_temp_path(TMP_PATH, 'sendpost.cookie.txt'); my $postdata_path = make_temp_path(TMP_PATH, 'sendpost.postdata.txt'); my %wget_options = ( - 'with_header', 1, - 'cookie_path', $cookie_path, + 'with_header'=> 1, + 'cookie_path'=> $cookie_path, + 'encoding' => ENCODING_MIRROR, + 'useragent' => WGET_USERAGENT, + 'verbose' => $cmd_options->{'verbose'} ); my %post_data = (); my %query_data = (); + my %header = (); + my $error = ''; + my $fh; + my $r; - if ($cmd_options{'edit'} ne '') { - $query_data{'e'} = $cmd_options{'edit'}; + unless ($cmd_options->{'quiet'} ne '') { + print "POST TO MIRROR\n"; + } + + if ($cmd_options->{'edit'} ne '') { + if ($cmd_options->{'edit'} =~ /^m([0-9]+)$/) { + $cmd_options->{'edit'} = $1; + } + $query_data{'e'} = 'm'.$cmd_options->{'edit'}; } if (keys %query_data) { - my $query = join_url(\%query_data); + my $query = url_query_encode(\%query_data, ENCODING_MIRROR()); my %query_url = ('query' => $query); - $post_url = merge_url($post_url, \%query_url); + $edit_url = merge_url($edit_url, \%query_url); + } + + unless ($cmd_options->{'quiet'} ne '') { + print "GET edit page"; + if ($cmd_options->{'verbose'} ne '') { + print ": $edit_url SAVE $tmp_path\n"; + write_data_file(\*STDOUT, 'console_out', 0, \%wget_options); + } + else { + print "\n"; + } + } + + $r = wget($edit_url, $tmp_path, \%wget_options,); + if ($r != 0) { + unless ($cmd_options->{'quiet'} ne '') { + print "WGET FAIL $r\n"; + } + print STDERR "Failed to get edit page: wget: $r\n"; + return $r; } - wget($post_url, $tmp_path, \%wget_options,); + %header = read_header_file($tmp_path, ENCODING_MIRROR()); + if ($cmd_options->{'verbose'} ne '') { + print "HEADER\n"; + write_data_file(\*STDOUT, 'console_out', 0, \%header); + } + + if ($header{':status-code'} != 200) { + unless ($cmd_options->{'quiet'} ne '') { + print 'FAIL '.$header{':status-code'}.' '.$header{':reason-phrase'}."\n"; + } + print STDERR 'Failed to get edit page: '.$header{':status-code'}.' '.$header{':reason-phrase'}."\n"; + return int($header{':status-code'}) + } - $wget_options{'referer'} = $post_url; - $wget_options{'postdata_path'} = postdata_path; + $wget_options{'referer'} = $edit_url; + $wget_options{'postdata_path'} = $postdata_path; $post_data{'subject'} = $post->{'subject'}; $post_data{'username'} = $post->{'username'}; @@ -317,25 +428,128 @@ sub post_to_mirror { if ($post->{'notify'}) { $post_data{'notify'} = 'on'; } + if ($cmd_options->{'edit'} ne '') { + $post_data{'forceID'} = $cmd_options->{'edit'}; + } $post_data{'post'} = 'Submit'; - wget($post_url, $tmp_path, \%wget_options, \%post_data); + unless ($cmd_options->{'quiet'} ne '') { + if ($cmd_options->{'verbose'} ne '') { + print "POST DATA\n"; + write_data_file(\*STDOUT, 'console_out', 0, \%post_data); + } + print "SUBMIT post"; + if ($cmd_options->{'verbose'} ne '') { + print ": $post_url SAVE $tmp_path\n"; + write_data_file(\*STDOUT, 'console_out', 0, \%wget_options); + } + else { + print "\n"; + } + } + + $r = wget($post_url, $tmp_path, \%wget_options, \%post_data); + if ($r != 0) { + unless ($cmd_options->{'quiet'} ne '') { + print "WGET FAIL $r\n"; + } + print STDERR "Failed to open post submit response: wget: $r\n"; + return $r; + } + + unless (open($fh, '<:encoding('.ENCODING_MIRROR.')', encode('locale_fs', $tmp_path))) { + unless ($cmd_options->{'quiet'} ne '') { + print "FAIL open $tmp_path\n"; + } + print STDERR "Failed to open post submit response. $tmp_path\n"; + return 1; + } + + %header = read_header_file($fh, ENCODING_MIRROR()); + if ($cmd_options->{'verbose'} ne '') { + print "HEADER\n"; + write_data_file(\*STDOUT, 'console_out', 0, \%header); + } + + if ($header{':status-code'} != 200) { + unless ($cmd_options->{'quiet'} ne '') { + print 'FAIL '.$header{':status-code'}.' '.$header{':reason-phrase'}."\n"; + } + print STDERR 'Failed to get post submit response page: '.$header{':status-code'}.' '.$header{':reason-phrase'}."\n"; + return int($header{':status-code'}) + } + + $error = find_mirror_post_error($fh); + close ($fh); + + if ($error ne '') { + unless ($cmd_options->{'quiet'} ne '') { + print "FAIL $error\n"; + } + print STDERR "Post not accepted by mirror: $error.\n"; + return 1; + } + + unless ($cmd_options->{'quiet'} ne '') { + print "POSTED TO MIRROR.\n\n"; + } - # unlink($tmp_path, $cookie_path, $postdata_path); + # in case of earlier failure, the temp files will remain, + # for investigation. + # a bot should remove them after some time. + unlink($tmp_path, $cookie_path, $postdata_path); + + return 0; } +sub find_mirror_post_error { + (my $file) = @_; + my $fh; + my $error = ''; + + if(ref($file)) { + $fh=$file; + } + else { + unless (open ($fh, '<:encoding('.ENCODING_MIRROR.')', encode('locale_fs', $file))) { + return "Failed to open $file"; + } + } + + while (defined(my $line = <$fh>)) { + # TODO: better catch mechanism! + if ($line =~ /

(.*)<\/p>/) { + if ($1 ne '') { + $error = html_entity_decode($1); + } + else { + $error = '""'; + } + last; + } + } + + unless (ref($file)) { + close ($fh); + } + + return $error; +} + sub wget { (my $url, my $path, my $options, my $postdata) = @_; my @arg = ( WGET, - '-q', '-t', WGET_RETRIES, - '--connect-timeout=', WGET_TIMEOUT, - '-U', WGET_USERAGENT + '--connect-timeout='.WGET_TIMEOUT, ); + unless ($options->{'verbose'} ne '') { + push(@arg, '-q'); + } + if ($options->{'with_header'}) { push(@arg, '--save-headers'); } @@ -350,15 +564,19 @@ sub wget { push(@arg, '--no-cookies'); } + if ($options->{'useragent'} ne '') { + push(@arg, '-U', $options->{'useragent'}); + } + if ($options->{'referer'} ne '') { - push(@arg, '--referer=', $options->{'referer'}); + push(@arg, '--referer='.$options->{'referer'}); } if ($options->{'postdata_path'} ne '') { if (defined $postdata) { write_postdata_file( $options->{'postdata_path'}, - ENCODING_FILE, '', + ENCODING_FILE, $options->{'encoding'}, $postdata ); push(@arg, '--post-file='.$options->{'postdata_path'}); @@ -370,11 +588,16 @@ sub wget { '-O', $path )); + if ($options->{'verbose'} ne '') { + print WGET."\n"; + foreach my $a (@arg) { + print "$a\n"; + } + } - system_encoded(WGET, @arg); - + my $r = system_encoded(WGET, @arg); - return 1; + return $r>>8; } diff --git a/settings-debug.txt b/settings-debug.txt index c10ec4c..0f754a9 100644 --- a/settings-debug.txt +++ b/settings-debug.txt @@ -14,6 +14,8 @@ tmp_path: /botm/tmp/test-post mirror_url: http://1190.botcastle1b/ott encoding_file: UTF-8 +encoding_ott: UTF-8 +encoding_mirror: UTF-8 default_subject: Re: 1190: "Time" default_username: _ @@ -23,6 +25,7 @@ wget_useragent: post tool (http://bicyclesonthemoon.info/git-projects/?p=ott/pos wget_retries: 3 wget_timeout: 60 + cp: /usr/bin/cp chmod: /usr/bin/chmod mkdir: /usr/bin/mkdir diff --git a/settings-release.txt b/settings-release.txt index 97a6a39..01c2ca7 100644 --- a/settings-release.txt +++ b/settings-release.txt @@ -11,12 +11,15 @@ bin_path: /botm/bin/post lib_path: /botm/lib/post tmp_path: /botm/tmp/post -mirror_url: https://1190.bicyclesonthemoon.info/ott +# TODO: wget doesn't accept my https :( +mirror_url: http://1190.bicyclesonthemoon.info/ott encoding_file: UTF-8 +encoding_ott: UTF-8 +encoding_mirror: UTF-8 default_subject: Re: 1190: "Time" -default_username: _ +default_username: default_username default_password: _ wget_useragent: post tool (http://bicyclesonthemoon.info/git-projects/?p=ott/post) diff --git a/settings.txt b/settings.txt index a82e4cd..ad4d366 100644 --- a/settings.txt +++ b/settings.txt @@ -43,6 +43,8 @@ PERL_DEFAULT_PASSWORD = @_PERL_CONSTANT_STR( DEFAULT_PASSWORD , $default_passwo PERL_DEFAULT_SUBJECT = @_PERL_CONSTANT_STR( DEFAULT_SUBJECT , $default_subject) PERL_DEFAULT_USERNAME = @_PERL_CONSTANT_STR( DEFAULT_USERNAME , $default_username) PERL_ENCODING_FILE = @_PERL_CONSTANT_STR( ENCODING_FILE , $encoding_file) +PERL_ENCODING_MIRROR = @_PERL_CONSTANT_STR( ENCODING_MIRROR , $encoding_mirror) +PERL_ENCODING_OTT = @_PERL_CONSTANT_STR( ENCODING_OTT , $encoding_ott) PERL_MIRROR_URL = @_PERL_CONSTANT_STR( MIRROR_URL , $mirror_url) PERL_TMP_PATH = @_PERL_CONSTANT_STR( TMP_PATH , $tmp_path) -- 2.30.2