]> bicyclesonthemoon.info Git - yplom/facebug1/commitdiff
Saving attached images, links in firstposts.
authorb <b@7dec801f-c475-4e67-ba99-809552d69c55>
Tue, 8 Dec 2015 18:37:06 +0000 (18:37 +0000)
committerb <b@7dec801f-c475-4e67-ba99-809552d69c55>
Tue, 8 Dec 2015 18:37:06 +0000 (18:37 +0000)
git-svn-id: svn://botcastle1b/yplom/facebug1@5 7dec801f-c475-4e67-ba99-809552d69c55

bot.1.pl
configure.pl
settings

index 7cc072944f1353712c40ff64287309f37909a8da..eb9f75c2186a38f481ae8b8f0aad8f33bd8b1608 100644 (file)
--- a/bot.1.pl
+++ b/bot.1.pl
@@ -2,12 +2,15 @@
 
 use strict;
 use Fcntl;
+use File::Copy;
 ###PROXY_LIB;
-use proxy_lib qw(url2path path2urldiv getcgi divideurl readconfigfile entitydecode urldecode);
+use proxy_lib qw(url2path urldiv2path path2urldiv getcgi divideurl readconfigfile entitydecode urldecode);
 use POSIX qw(strftime);
 
 ###ARCH_PATH;
 ###GROUPSETTINGS_PATH;
+###KEY_BITS;
+###MAX_REDIRECTIONS;
 
 my $time = time();
 srand ($time-$$);
@@ -133,7 +136,7 @@ sub processfile {
                $id='';
        }
        
-       while () {
+       for (my $ind=0; $ind<MAX_REDIRECTIONS; ++$ind) {
                %header = readheaderfile($headerpath); 
                if ($header{'status'} =~ /^200 /) {
                        last;
@@ -162,7 +165,7 @@ sub processfile {
                        $timenumber='00000000000000';
                }
                else {
-                       $timenumber = strftime('%Y%m%d%h%M%S',gmtime($stat[9]));
+                       $timenumber = strftime('%Y%m%d%H%M%S',gmtime($stat[9]));
                }
        }
        
@@ -215,6 +218,7 @@ sub processfile {
                                # print "tag: $text\n";
                                %tag = taginfo($text);
                        }
+                       local $/ = "\n";
                        
                        if ($mode eq 'thread'){
                                if ($tag{'<'} eq 'h3') {
@@ -243,34 +247,38 @@ sub processfile {
                                        
                                        # if (open ($threadfile, "+<", $threadpath)) {
                                        if (sysopen ($threadfile, $threadpath, O_RDWR | O_CREAT)) {
-                                               # In the future it might be necessary to use a different function.
-                                               local $/ = "\n";
-                                               %thread2 = readheaderfile($threadfile);
-                                               
-                                               if (($thread2{'timenumber'} ne '')and($thread2{'timenumber'}>$thread{'timenumber'})) {
-                                                       print ("Newer version already saved.\n\n");
-                                               }
-                                               else {
-                                                       foreach my $ind (keys %thread) {
-                                                               $thread2{$ind}=$thread{$ind};
-                                                       }
-                                                       if ($thread2{'key'} eq '') {
-                                                               $thread2{'key'} = key(256);
-                                                       }
+                                               if (flock ($threadfile, 2)) {
+                                                       # In the future it might be necessary to use a different function.
+                                                       %thread2 = readheaderfile($threadfile);
                                                        
-                                                       if (seek($threadfile, 0, 0)) {
-                                                               foreach my $ind (keys %thread2) {
-                                                                       print $threadfile "$ind: $thread2{$ind}\n";
-                                                                       print "$ind: $thread2{$ind}\n"; ####
-                                                               }
-                                                               print $threadfile "\n";
-                                                               truncate ($threadfile , tell($threadfile));
-                                                               print "saved.\n\n";
+                                                       if (($thread2{'timenumber'} ne '')and($thread2{'timenumber'}>$thread{'timenumber'})) {
+                                                               print ("Newer version already saved.\n\n");
                                                        }
                                                        else {
-                                                               print "Failed seek $threadfile.\n\n";
+                                                               foreach my $ind (keys %thread) {
+                                                                       $thread2{$ind}=$thread{$ind};
+                                                               }
+                                                               if ($thread2{'key'} eq '') {
+                                                                       $thread2{'key'} = key(KEY_BITS);
+                                                               }
+                                                               
+                                                               if (seek($threadfile, 0, 0)) {
+                                                                       foreach my $ind (keys %thread2) {
+                                                                               print $threadfile "$ind: $thread2{$ind}\n";
+                                                                               print "$ind: $thread2{$ind}\n"; ####
+                                                                       }
+                                                                       print $threadfile "\n";
+                                                                       truncate ($threadfile , tell($threadfile));
+                                                                       print "saved.\n\n";
+                                                               }
+                                                               else {
+                                                                       print "Failed seek $threadfile.\n\n";
+                                                               }
                                                        }
                                                }
+                                               else {
+                                                       print "Can't lock $threadfile.\n\n";
+                                               }
                                                close ($threadfile);
                                        }
                                        else
@@ -317,7 +325,7 @@ sub processfile {
                                }
                                elsif (!$ignoretext) {
                                        if ($tag{'<'} eq 'a') {
-                                               if ($tag{'href'} =~ /^https?:\/\/([a-z0-9]+\.)?facebook\.com\/l\.php\?(.*&)?u=([^&]+)(&.*)?$/) {
+                                               if ($tag{'href'} =~ /^https?:\/\/([a-z0-9\.\-]+)?facebook\.com\/l\.php\?(.*&)?u=([^&]+)(&.*)?$/) {
                                                        $thread{'postcontent'}.='<a href="'.urldecode($3).'">';
                                                        $link=1;
                                                }
@@ -347,7 +355,6 @@ sub processfile {
                        }
                        
                        elsif ($mode eq 'thread-attachment') {
-                               ###############################################################
                                if ($tag{'<'} eq 'div') {
                                        ++$level;
                                }
@@ -359,34 +366,78 @@ sub processfile {
                                                $mode = 'thread';
                                        }
                                }
-                               elsif (($tag{'<'} eq 'a') and ($tag{'href'} =~ /^\/photo\.php\?(.*&)?fbid=([0-9]+)(&.*)?$/)) {
-                                       ++$attnumber;
-                                       $thread{'img-'.$attnumber}=$2;
-                                       $mode = 'thread-attachment-img';
+                               elsif ($tag{'<'} eq 'a') {
+                                       if ($tag{'href'} =~ /^\/photo\.php\?(.*&)?fbid=([0-9]+)(&.*)?$/) {
+                                               ++$attnumber;
+                                               $thread{'img-'.$attnumber}=$2;
+                                               $mode = 'thread-attachment-img';
+                                       }
+                                       elsif ($tag{'href'} =~ /^https?:\/\/([a-z0-9\.\-]+)?facebook\.com\/l\.php\?(.*&)?u=([^&]+)(&.*)?$/) {
+                                               ++$attnumber;
+                                               $thread{'link-'.$attnumber}=urldecode($3);
+                                               $mode = 'thread-attachment-link';
+                                       }
                                }
                        }
                        
                        elsif ($mode eq 'thread-attachment-img') {
                                if ($tag{'<'} eq 'img') {
-                                       # SAVE THE IMG HERE
+                                       my $imgkey = saveimg($tag{'src'},$thread{'img-'.$attnumber},$$settings{'id'});
+                                       if ($imgkey ne '') {
+                                               $thread{'imgkey-'.$attnumber}=$imgkey;
+                                       }
+                                       else {
+                                               $thread{'img-'.$attnumber}=undef;
+                                               --$attnumber;
+                                       }
                                }
                                elsif ($tag{'<'} eq '/a') {
                                        $mode = 'thread-attachment';
                                }
                        }
                        
+                       elsif ($mode eq 'thread-attachment-link') {
+                               if($tag{'<'} eq 'h3'){
+                                       $mode = 'thread-attachment-link-title';
+                               }
+                               elsif (($tag{'<'} eq 'img')and($tag{'src'} =~ /^https?:\/\/([a-z0-9\.\-]+)?fbcdn\.net\/safe_image\.php\?(.*&)?url=([^&]+)(&.*)?$/)) {
+                                       my $imgurl = urldecode($3);
+                                       my $imgid='';
+                                       $imgurl =~ s/([^A-Za-z0-9_\.])/sprintf ("@%02X",ord($1))/eg;
+                                       
+                                       while(length($imgurl)>240) {
+                                               $imgid.=substr($imgurl,0,120).'-/';
+                                               $imgurl=substr($imgurl,120);
+                                       }
+                                       $imgid.=$imgurl;
+                                       my $imgkey = saveimg($tag{'src'},$imgid,$$settings{'id'});
+                                       if ($imgkey ne '') {
+                                               $thread{'img-'.$attnumber}=$imgid;
+                                               $thread{'imgkey-'.$attnumber}=$imgkey;
+                                       }
+                               }
+                               elsif ($tag{'<'} eq '/a') {
+                                       $mode = 'thread-attachment';
+                               }
+                       }
                        
-                       
+                       elsif ($mode eq 'thread-attachment-link-title') {
+                               if ($tag{'<'} eq '/h3') {
+                                       $mode = 'thread-attachment-link';
+                               }
+                       }
                        if ($tag{"\\"} ne '') {
                                $closetag = 1;
                                next;
                        }
                        
+                       
                        local $/ = '<';
                        unless (defined ($text = <$contentfile>)) {
                                close($contentfile);
                                return;
                        }
+                       local $/ = "\n";
                        $text =~ s/<$//;
                        
                        if($mode eq 'thread-content') {
@@ -397,6 +448,13 @@ sub processfile {
                        elsif ($mode eq 'thread-time') {
                                $thread{'timetext'}.=$text;
                        }
+                       elsif ($mode eq 'thread-attachment-link-title') {
+                               $thread{'linktitle-'.$attnumber}.=$text;
+                       }
+                       elsif ($mode eq 'thread-attachment-link') {
+                               $thread{'linktext-'.$attnumber}.=$text;
+                       }
+                       
                        # print "text: $text\n";
                }
                
@@ -432,6 +490,7 @@ sub readheaderfile {
        
        while (defined(my $line = <$headerfile>)) {
                $line =~ s/[\r\n]$//g;
+               # print">> $line <<\n";
                my $headname='';
                my $headval='';
                
@@ -593,4 +652,147 @@ sub key {
                $keytext.=sprintf('%04x',int(rand(65536)));
        }
        return $keytext;
+}
+
+sub saveimg {
+       (my $url, my $id, my $groupid) = @_;
+       
+       my $basepath;
+       my $imgpath;
+       my $headpath;
+       my $headopen=0;;
+       my $headfile;
+       my %header;
+       
+       my $archimgpath;
+       my $archheadpath;
+       my $archbasepath;
+       my %archheader;
+       
+       (my $prot, my $host, my $port, my $path, my $query);
+       my @stat;
+       
+       print "  Image $id\n";
+       
+       $basepath=ARCH_PATH.$groupid.'/image/';
+       my $idtemp = $id;
+       while((my $ind = index($idtemp,'/'))>=0) {
+               $basepath.=substr($idtemp, 0, $ind+1);
+               $idtemp=substr($idtemp,$ind+1);
+               unless (-d $basepath){
+                       unless (mkdir $basepath) {
+                               print "  Can't mkdir $basepath.\n";
+                               return '';
+                       }
+               }
+       }
+       
+       $basepath.=$idtemp;
+       $headpath=$basepath.'@h';
+       $imgpath=$basepath.'@v';
+       
+       if ( -f $imgpath) {
+               if (open($headfile,"+<",$headpath)) {
+                       if (flock ($headfile, 2)) {
+                               %header = readheaderfile($headfile);
+                               if ($header{'key'} ne '') {
+                                       print "  Already saved.\n\n";
+                                       close($headfile);
+                                       return $header{'key'};
+                               }
+                               else {
+                                       $headopen = 1;
+                               }
+                       }
+                       else {
+                               print "  Can't lock $headpath.\n\n";
+                               close($headfile);
+                               return '';
+                       }
+               }
+       }
+       
+       ($prot, $host, $port, $path, $query) = divideurl($url);
+       $archbasepath = urldiv2path($prot, $host, $port, $path, $query);
+       $archheadpath = $archbasepath.'@h';
+       $archimgpath = $archbasepath.'@v';
+       print "  url: $url\n";
+       for (my $ind=0; $ind<MAX_REDIRECTIONS; ++$ind) {
+               %archheader = readheaderfile($archheadpath); 
+               
+               if ($archheader{'status'} =~ /^30[1237] /) {
+                       my $location;
+                       unless (defined($location = $archheader{'location'})) {
+                               last;
+                       }
+                       if ($location !~ /^[a-z]+:\/\//) {
+                               $location = $prot.'://'.$host.(($port ne '')?(':'.$port):'').$location;
+                       }
+                       print "  Redirect: $location\n";
+                       ($prot, $host, $port, $path, $query) = divideurl($location);
+                       $archbasepath = urldiv2path($prot, $host, $port, $path, $query);
+                       $archheadpath = $archbasepath.'@h';
+                       $archimgpath = $archbasepath.'@v';
+               }
+               else {
+                       last;
+               }
+       }
+       if ($archheader{'status'} !~ /^200 /) {
+               print "  Not found ($url $archheader{'status'}).\n\n";
+               if ($headopen) {
+                       close ($headfile);
+               }
+               return '';
+       }
+       
+       foreach my $ind (keys %archheader) {
+               if ($ind =~ /^(content-type|content-disposition)$/) {
+                       $header{$ind}=$archheader{$ind};
+               }
+       }
+       $header{'key'}=key(KEY_BITS);
+       if (@stat = stat($archimgpath)){
+               $header{'content-length'}=$stat[7];
+       }
+       else {
+               print "  Can't stat $imgpath. $stat[7]\n";
+       }
+       $header{'id'}=$id;
+       $header{'groupid'}=$groupid;
+       $header{'timenumber'}=strftime('%Y%m%d%H%M%S',gmtime(time()));
+       
+       if ($headopen) {
+               unless (seek($headfile,0,0)) {
+                       print "  Can't seek $headpath.\n\n";
+                       close ($headfile);
+                       return '';
+               }
+       }
+       else{
+               unless (open($headfile,">",$headpath)) {
+                       print "  Can't open $headpath.\n\n";
+                       return '';
+               }
+               unless (flock ($headfile, 2)) {
+                       print "  Can't lock $headpath.\n\n";
+                       close($headfile);
+                       return '';
+               }
+       }
+       
+       foreach my $ind (keys %header) {
+               print $headfile "$ind: $header{$ind}\n";
+               print "  $ind: $header{$ind}\n";
+       }
+       print $headfile "\n";
+       close ($headfile);
+       
+       unless (copy($archimgpath,$imgpath)) {
+               print "  Can't copy $archimgpath.\n\n";
+               return '';
+       }
+       print "saved.\n\n";
+       return $header{'key'};
+       
 }
\ No newline at end of file
index 909b26299e99fd08cd66fe54ad0faa147d4826a0..acf692e462727f26ce4d0e123d94fb3056f7cb49 100644 (file)
@@ -36,6 +36,9 @@ close ($configfile);
 $def{'PROXY_ARCH_PATH'}   = "use constant PROXY_ARCH_PATH    => '".$set{'proxy_data_path'}."archive/';";
 $def{'ARCH_PATH'}         = "use constant ARCH_PATH          => '".$set{'data_path'}."group/';";
 $def{'GROUPSETTINGS_PATH'}= "use constant GROUPSETTINGS_PATH => '".$set{'data_path'}."groupsettings/';";
+$def{'KEY_BITS'}          = "use constant KEY_BITS           => ".$set{'key_bits'}.";";
+$def{'MAX_REDIRECTIONS'}  = "use constant MAX_REDIRECTIONS   => ".$set{'max_redirections'}.";";
+
 
 $def{'PROXY_LIB'} = "use lib '".$set{'proxy_lib_path'}."';";
 
index c17f635e42dd79cf968d738a8d9ea6a94e6f2144..0b058acda9059ce06ab38c84be0a2e10d3ddd0aa 100644 (file)
--- a/settings
+++ b/settings
@@ -21,6 +21,9 @@ gcc     = /usr/bin/gcc
 gzip    = /bin/gzip
 c_flags = -g -Wall
 
+key_bits = 256
+max_redirections = 16
+
 log_size_limit    = 65536 # How big can a log file be
 logs_uncompressed =     2 # How many uncompressed old logs to keep
 logs_total        =    10 # How many old logs to keep