From 7bb2d36c7d5bdd5f00423ad1927cfefc9e8f771d Mon Sep 17 00:00:00 2001 From: b Date: Mon, 7 Dec 2015 20:37:14 +0000 Subject: [PATCH] file processing, continued. saving. git-svn-id: svn://botcastle1b/yplom/facebug1@4 7dec801f-c475-4e67-ba99-809552d69c55 --- bot.1.pl | 158 ++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 127 insertions(+), 31 deletions(-) diff --git a/bot.1.pl b/bot.1.pl index 3d00706..7cc0729 100644 --- a/bot.1.pl +++ b/bot.1.pl @@ -1,7 +1,7 @@ ###PERL; use strict; - +use Fcntl; ###PROXY_LIB; use proxy_lib qw(url2path path2urldiv getcgi divideurl readconfigfile entitydecode urldecode); use POSIX qw(strftime); @@ -10,6 +10,7 @@ use POSIX qw(strftime); ###GROUPSETTINGS_PATH; my $time = time(); +srand ($time-$$); print strftime("%d.%m.%Y %H:%M:%S", gmtime($time))."\n"; if (scalar @ARGV) { @@ -111,6 +112,7 @@ sub processfile { my $timenumber; my %thread; + my %thread2; if ($headerpath =~ /^((.+)\@h)$/) { $headerpath = $1; @@ -189,6 +191,7 @@ sub processfile { my $ignoretext; my $link; my $hidename; + my $attnumber; local $/ = '<'; unless (defined ($text = <$contentfile>)) { @@ -217,7 +220,7 @@ sub processfile { if ($tag{'<'} eq 'h3') { $mode = 'thread-author'; } - elsif (($tag{'<'} eq 'div') and ($tag{'class'} =~ /^(bj|bk)$/)) { + elsif (($tag{'<'} eq 'div') and ($tag{'class'} =~ /^(bj|bk|bm)$/)) { # These are very helpful names, facebug, thank you! $mode='thread-content'; $level=0; $ignoretext=1; @@ -226,18 +229,54 @@ sub processfile { elsif ($tag{'<'} eq 'abbr') { $mode = 'thread-time'; } + elsif (($tag{'<'} eq 'div') and ($tag{'class'} =~ /^(bn|bl)$/)) { + $mode='thread-attachment'; + $level=0; + $attnumber=0; + } elsif (($tag{'<'} eq 'div') and ($tag{'id'} =~ /^ufi_/)){ $mode='posts'; - #!!! { + my $threadfile; + my $threadpath = ARCH_PATH.$$settings{'id'}.'/'.'thread/'.$thread{'id'}; - foreach my $ind (keys %thread) { - print "$ind: $thread{$ind}\n"; + # if (open ($threadfile, "+<", $threadpath)) { + if (sysopen ($threadfile, $threadpath, O_RDWR | O_CREAT)) { + # In the future it might be necessary to use a different function. + local $/ = "\n"; + %thread2 = readheaderfile($threadfile); + + if (($thread2{'timenumber'} ne '')and($thread2{'timenumber'}>$thread{'timenumber'})) { + print ("Newer version already saved.\n\n"); + } + else { + foreach my $ind (keys %thread) { + $thread2{$ind}=$thread{$ind}; + } + if ($thread2{'key'} eq '') { + $thread2{'key'} = key(256); + } + + if (seek($threadfile, 0, 0)) { + foreach my $ind (keys %thread2) { + print $threadfile "$ind: $thread2{$ind}\n"; + print "$ind: $thread2{$ind}\n"; #### + } + print $threadfile "\n"; + truncate ($threadfile , tell($threadfile)); + print "saved.\n\n"; + } + else { + print "Failed seek $threadfile.\n\n"; + } + } + close ($threadfile); + } + else + { + print "Can't open $threadpath.\n\n"; } - print "\n"; - - #!!! } } } elsif ($mode eq 'thread-author') { @@ -295,9 +334,9 @@ sub processfile { $hidename=0; } } - # else { - # $thread{'postcontent'}.='<<<'.$tag{'<'}.'>>>'; - # } + else { + # $thread{'postcontent'}.=''; + } } } @@ -307,6 +346,36 @@ sub processfile { } } + elsif ($mode eq 'thread-attachment') { + ############################################################### + if ($tag{'<'} eq 'div') { + ++$level; + } + elsif ($tag{'<'} eq '/div') { + if($level){ + --$level; + } + else { + $mode = 'thread'; + } + } + elsif (($tag{'<'} eq 'a') and ($tag{'href'} =~ /^\/photo\.php\?(.*&)?fbid=([0-9]+)(&.*)?$/)) { + ++$attnumber; + $thread{'img-'.$attnumber}=$2; + $mode = 'thread-attachment-img'; + } + } + + elsif ($mode eq 'thread-attachment-img') { + if ($tag{'<'} eq 'img') { + # SAVE THE IMG HERE + } + elsif ($tag{'<'} eq '/a') { + $mode = 'thread-attachment'; + } + } + + if ($tag{"\\"} ne '') { $closetag = 1; @@ -350,34 +419,45 @@ sub readheaderfile { my $headerfile; my %header; + if(ref($headerpath)) { + $headerfile=$headerpath; + } + else { + unless (open ($headerfile, "<", $headerpath)) { + return %header; + } + } + my $lastname=''; - if (open ($headerfile, "<", $headerpath)) { - while (defined(my $line = <$headerfile>)) { - $line =~ s/[\r\n]$//g; - my $headname=''; - my $headval=''; + while (defined(my $line = <$headerfile>)) { + $line =~ s/[\r\n]$//g; + my $headname=''; + my $headval=''; + + if($line =~ /^[ \t]+([^ \t].*)$/){ + if($lastname ne '') { + $header{$lastname}.=$1; + } + } + elsif ($line =~ /^([^:]*):[ \t]*([^ \t](.*[^ \t])?)[ \t]*$/) { + $headname = lc($1); + $headval = $2; - if($line =~ /^[ \t]+([^ \t].*)$/){ - if($lastname ne '') { - $header{$lastname}.=$1; - } + if ($header{$headname} ne '') { + $header{$headname}.=', '.$headval; } - elsif ($line =~ /^([^:]*):[ \t]*([^ \t](.*[^ \t])?)[ \t]*$/) { - $headname = lc($1); - $headval = $2; - - if ($header{$headname} ne '') { - $header{$headname}.=', '.$headval; - } - else { - $header{$headname}=$headval; - } - $lastname = $headname; + else { + $header{$headname}=$headval; } + $lastname = $headname; } + } + + unless (ref($headerpath)) { close ($headerfile); } + return %header; } @@ -497,4 +577,20 @@ sub taginfo { # print "\n"; # } return %tag; +} + +sub key { + (my $bits) = @_; + my $p = int($bits / 16); + my $q = $bits % 16; + my $keytext; + + if ($q){ + my $r = int(($q+3)/4); + $keytext=sprintf('%0'.$r.'x',int(rand(1<<$q))); + } + for(my $ind = 0; $ind < $p; ++$ind){ + $keytext.=sprintf('%04x',int(rand(65536))); + } + return $keytext; } \ No newline at end of file -- 2.30.2