From: b Date: Tue, 8 Dec 2015 18:37:06 +0000 (+0000) Subject: Saving attached images, links in firstposts. X-Git-Url: http://bicyclesonthemoon.info/git-projects/?a=commitdiff_plain;h=c78e6fac54ae75bf79a8468614b02e04a1a58ee5;p=yplom%2Ffacebug1 Saving attached images, links in firstposts. git-svn-id: svn://botcastle1b/yplom/facebug1@5 7dec801f-c475-4e67-ba99-809552d69c55 --- diff --git a/bot.1.pl b/bot.1.pl index 7cc0729..eb9f75c 100644 --- a/bot.1.pl +++ b/bot.1.pl @@ -2,12 +2,15 @@ use strict; use Fcntl; +use File::Copy; ###PROXY_LIB; -use proxy_lib qw(url2path path2urldiv getcgi divideurl readconfigfile entitydecode urldecode); +use proxy_lib qw(url2path urldiv2path path2urldiv getcgi divideurl readconfigfile entitydecode urldecode); use POSIX qw(strftime); ###ARCH_PATH; ###GROUPSETTINGS_PATH; +###KEY_BITS; +###MAX_REDIRECTIONS; my $time = time(); srand ($time-$$); @@ -133,7 +136,7 @@ sub processfile { $id=''; } - while () { + for (my $ind=0; $ind$thread{'timenumber'})) { - print ("Newer version already saved.\n\n"); - } - else { - foreach my $ind (keys %thread) { - $thread2{$ind}=$thread{$ind}; - } - if ($thread2{'key'} eq '') { - $thread2{'key'} = key(256); - } + if (flock ($threadfile, 2)) { + # In the future it might be necessary to use a different function. + %thread2 = readheaderfile($threadfile); - if (seek($threadfile, 0, 0)) { - foreach my $ind (keys %thread2) { - print $threadfile "$ind: $thread2{$ind}\n"; - print "$ind: $thread2{$ind}\n"; #### - } - print $threadfile "\n"; - truncate ($threadfile , tell($threadfile)); - print "saved.\n\n"; + if (($thread2{'timenumber'} ne '')and($thread2{'timenumber'}>$thread{'timenumber'})) { + print ("Newer version already saved.\n\n"); } else { - print "Failed seek $threadfile.\n\n"; + foreach my $ind (keys %thread) { + $thread2{$ind}=$thread{$ind}; + } + if ($thread2{'key'} eq '') { + $thread2{'key'} = key(KEY_BITS); + } + + if (seek($threadfile, 0, 0)) { + foreach my $ind (keys %thread2) { + print $threadfile "$ind: $thread2{$ind}\n"; + print "$ind: $thread2{$ind}\n"; #### + } + print $threadfile "\n"; + truncate ($threadfile , tell($threadfile)); + print "saved.\n\n"; + } + else { + print "Failed seek $threadfile.\n\n"; + } } } + else { + print "Can't lock $threadfile.\n\n"; + } close ($threadfile); } else @@ -317,7 +325,7 @@ sub processfile { } elsif (!$ignoretext) { if ($tag{'<'} eq 'a') { - if ($tag{'href'} =~ /^https?:\/\/([a-z0-9]+\.)?facebook\.com\/l\.php\?(.*&)?u=([^&]+)(&.*)?$/) { + if ($tag{'href'} =~ /^https?:\/\/([a-z0-9\.\-]+)?facebook\.com\/l\.php\?(.*&)?u=([^&]+)(&.*)?$/) { $thread{'postcontent'}.=''; $link=1; } @@ -347,7 +355,6 @@ sub processfile { } elsif ($mode eq 'thread-attachment') { - ############################################################### if ($tag{'<'} eq 'div') { ++$level; } @@ -359,34 +366,78 @@ sub processfile { $mode = 'thread'; } } - elsif (($tag{'<'} eq 'a') and ($tag{'href'} =~ /^\/photo\.php\?(.*&)?fbid=([0-9]+)(&.*)?$/)) { - ++$attnumber; - $thread{'img-'.$attnumber}=$2; - $mode = 'thread-attachment-img'; + elsif ($tag{'<'} eq 'a') { + if ($tag{'href'} =~ /^\/photo\.php\?(.*&)?fbid=([0-9]+)(&.*)?$/) { + ++$attnumber; + $thread{'img-'.$attnumber}=$2; + $mode = 'thread-attachment-img'; + } + elsif ($tag{'href'} =~ /^https?:\/\/([a-z0-9\.\-]+)?facebook\.com\/l\.php\?(.*&)?u=([^&]+)(&.*)?$/) { + ++$attnumber; + $thread{'link-'.$attnumber}=urldecode($3); + $mode = 'thread-attachment-link'; + } } } elsif ($mode eq 'thread-attachment-img') { if ($tag{'<'} eq 'img') { - # SAVE THE IMG HERE + my $imgkey = saveimg($tag{'src'},$thread{'img-'.$attnumber},$$settings{'id'}); + if ($imgkey ne '') { + $thread{'imgkey-'.$attnumber}=$imgkey; + } + else { + $thread{'img-'.$attnumber}=undef; + --$attnumber; + } } elsif ($tag{'<'} eq '/a') { $mode = 'thread-attachment'; } } + elsif ($mode eq 'thread-attachment-link') { + if($tag{'<'} eq 'h3'){ + $mode = 'thread-attachment-link-title'; + } + elsif (($tag{'<'} eq 'img')and($tag{'src'} =~ /^https?:\/\/([a-z0-9\.\-]+)?fbcdn\.net\/safe_image\.php\?(.*&)?url=([^&]+)(&.*)?$/)) { + my $imgurl = urldecode($3); + my $imgid=''; + $imgurl =~ s/([^A-Za-z0-9_\.])/sprintf ("@%02X",ord($1))/eg; + + while(length($imgurl)>240) { + $imgid.=substr($imgurl,0,120).'-/'; + $imgurl=substr($imgurl,120); + } + $imgid.=$imgurl; + my $imgkey = saveimg($tag{'src'},$imgid,$$settings{'id'}); + if ($imgkey ne '') { + $thread{'img-'.$attnumber}=$imgid; + $thread{'imgkey-'.$attnumber}=$imgkey; + } + } + elsif ($tag{'<'} eq '/a') { + $mode = 'thread-attachment'; + } + } - + elsif ($mode eq 'thread-attachment-link-title') { + if ($tag{'<'} eq '/h3') { + $mode = 'thread-attachment-link'; + } + } if ($tag{"\\"} ne '') { $closetag = 1; next; } + local $/ = '<'; unless (defined ($text = <$contentfile>)) { close($contentfile); return; } + local $/ = "\n"; $text =~ s/<$//; if($mode eq 'thread-content') { @@ -397,6 +448,13 @@ sub processfile { elsif ($mode eq 'thread-time') { $thread{'timetext'}.=$text; } + elsif ($mode eq 'thread-attachment-link-title') { + $thread{'linktitle-'.$attnumber}.=$text; + } + elsif ($mode eq 'thread-attachment-link') { + $thread{'linktext-'.$attnumber}.=$text; + } + # print "text: $text\n"; } @@ -432,6 +490,7 @@ sub readheaderfile { while (defined(my $line = <$headerfile>)) { $line =~ s/[\r\n]$//g; + # print">> $line <<\n"; my $headname=''; my $headval=''; @@ -593,4 +652,147 @@ sub key { $keytext.=sprintf('%04x',int(rand(65536))); } return $keytext; +} + +sub saveimg { + (my $url, my $id, my $groupid) = @_; + + my $basepath; + my $imgpath; + my $headpath; + my $headopen=0;; + my $headfile; + my %header; + + my $archimgpath; + my $archheadpath; + my $archbasepath; + my %archheader; + + (my $prot, my $host, my $port, my $path, my $query); + my @stat; + + print " Image $id\n"; + + $basepath=ARCH_PATH.$groupid.'/image/'; + my $idtemp = $id; + while((my $ind = index($idtemp,'/'))>=0) { + $basepath.=substr($idtemp, 0, $ind+1); + $idtemp=substr($idtemp,$ind+1); + unless (-d $basepath){ + unless (mkdir $basepath) { + print " Can't mkdir $basepath.\n"; + return ''; + } + } + } + + $basepath.=$idtemp; + $headpath=$basepath.'@h'; + $imgpath=$basepath.'@v'; + + if ( -f $imgpath) { + if (open($headfile,"+<",$headpath)) { + if (flock ($headfile, 2)) { + %header = readheaderfile($headfile); + if ($header{'key'} ne '') { + print " Already saved.\n\n"; + close($headfile); + return $header{'key'}; + } + else { + $headopen = 1; + } + } + else { + print " Can't lock $headpath.\n\n"; + close($headfile); + return ''; + } + } + } + + ($prot, $host, $port, $path, $query) = divideurl($url); + $archbasepath = urldiv2path($prot, $host, $port, $path, $query); + $archheadpath = $archbasepath.'@h'; + $archimgpath = $archbasepath.'@v'; + print " url: $url\n"; + for (my $ind=0; $ind",$headpath)) { + print " Can't open $headpath.\n\n"; + return ''; + } + unless (flock ($headfile, 2)) { + print " Can't lock $headpath.\n\n"; + close($headfile); + return ''; + } + } + + foreach my $ind (keys %header) { + print $headfile "$ind: $header{$ind}\n"; + print " $ind: $header{$ind}\n"; + } + print $headfile "\n"; + close ($headfile); + + unless (copy($archimgpath,$imgpath)) { + print " Can't copy $archimgpath.\n\n"; + return ''; + } + print "saved.\n\n"; + return $header{'key'}; + } \ No newline at end of file diff --git a/configure.pl b/configure.pl index 909b262..acf692e 100644 --- a/configure.pl +++ b/configure.pl @@ -36,6 +36,9 @@ close ($configfile); $def{'PROXY_ARCH_PATH'} = "use constant PROXY_ARCH_PATH => '".$set{'proxy_data_path'}."archive/';"; $def{'ARCH_PATH'} = "use constant ARCH_PATH => '".$set{'data_path'}."group/';"; $def{'GROUPSETTINGS_PATH'}= "use constant GROUPSETTINGS_PATH => '".$set{'data_path'}."groupsettings/';"; +$def{'KEY_BITS'} = "use constant KEY_BITS => ".$set{'key_bits'}.";"; +$def{'MAX_REDIRECTIONS'} = "use constant MAX_REDIRECTIONS => ".$set{'max_redirections'}.";"; + $def{'PROXY_LIB'} = "use lib '".$set{'proxy_lib_path'}."';"; diff --git a/settings b/settings index c17f635..0b058ac 100644 --- a/settings +++ b/settings @@ -21,6 +21,9 @@ gcc = /usr/bin/gcc gzip = /bin/gzip c_flags = -g -Wall +key_bits = 256 +max_redirections = 16 + log_size_limit = 65536 # How big can a log file be logs_uncompressed = 2 # How many uncompressed old logs to keep logs_total = 10 # How many old logs to keep