]> bicyclesonthemoon.info Git - yplom/facebug1/commitdiff
readheaderfile() moved to proxy library.
authorb <b@7dec801f-c475-4e67-ba99-809552d69c55>
Wed, 16 Dec 2015 20:54:14 +0000 (20:54 +0000)
committerb <b@7dec801f-c475-4e67-ba99-809552d69c55>
Wed, 16 Dec 2015 20:54:14 +0000 (20:54 +0000)
can process all 3 types of facebook pages now.

git-svn-id: svn://botcastle1b/yplom/facebug1@8 7dec801f-c475-4e67-ba99-809552d69c55

bot.1.pl

index 82f50ae5a6cfee998f3e1e76c4c0812faca4bacd..de81ef39a27764ef7aca33b48a25f0e656d0f3e5 100644 (file)
--- a/bot.1.pl
+++ b/bot.1.pl
@@ -4,7 +4,7 @@ use strict;
 use Fcntl;
 use File::Copy;
 ###PROXY_LIB;
-use proxy_lib qw(url2path urldiv2path path2urldiv getcgi divideurl joinurl readconfigfile entitydecode urldecode);
+use proxy_lib qw(url2path urldiv2path path2urldiv getcgi divideurl joinurl readconfigfile entitydecode urldecode readheaderfile);
 use POSIX qw(strftime);
 
 ###ARCH_PATH;
@@ -64,18 +64,24 @@ sub processgroup {
                return;
        }
        
-       $archpath = url2path('https://m.facebook.com/groups/'.$groupid).'@q/';
-       
        print "Group $groupid\n";
        
-       processdir($archpath,$groupid,\%settings,\%names);
+       $archpath = url2path('https://m.facebook.com/groups/'.$groupid);
+       processdir($archpath.'@q/',\%settings,\%names,0);
+       processdir($archpath.'/@q/',\%settings,\%names,0);
+       
+       $archpath = url2path('https://m.facebook.com/comment/replies');
+       processdir($archpath.'@q/',\%settings,\%names,1);
+       processdir($archpath.'/@q/',\%settings,\%names,1);
+       
 }
 
 sub processdir {
-       (my $dirpath, my $groupid, my $settings, my $names) = @_;
+       (my $dirpath, my $settings, my $names, my $pagemode) = @_;
        my $dir;
        my $subpath;
        my $subpathfull;
+       print "Dir $dirpath\n";
        
        unless ( opendir ($dir, $dirpath)) {
                return;
@@ -86,17 +92,17 @@ sub processdir {
                        next;
                }
                if (-f $subpathfull) {
-                       processfile($subpathfull, $groupid, $settings, $names);
+                       processfile($subpathfull, $settings, $names, $pagemode);
                }
                elsif (-d $subpathfull) {
-                       processdir($subpathfull.'/', $groupid, $settings, $names);
+                       processdir($subpathfull.'/', $settings, $names, $pagemode);
                }
        }
        closedir ($dir);        
 }
 
 sub processfile {
-       (my $headerpath, my $groupid, my $settings, my $names) = @_;
+       (my $headerpath, my $settings, my $names, my $pagemode) = @_;
        my $basepath;
        my $contentpath;
        my $contentfile;
@@ -111,7 +117,9 @@ sub processfile {
        
        my %cgi;
        
-       my $id;
+       my $postid;
+       my $threadid;
+       my $groupid=0; ###!
        my $timenumber;
        
        my %thread;
@@ -137,18 +145,38 @@ sub processfile {
        ### REDESIGN THE CONDITIONS!
        if($query ne '') {
                %cgi=getcgi($query);
-               $id = $cgi{'id'};
-               if ($id =~ /^[0-9]+$/) {
-                       $pagetype='thread';
+               if($pagemode) {
+                       $postid = $cgi{'ctoken'};
+                       if ($postid =~ /^([0-9]+)_([0-9]+)$/) {
+                               $threadid = $1;
+                               $postid = $2;
+                               $pagetype = 'post';
+                       }
+                       else {
+                               return;
+                       }
                }
-               else {
-                       $pagetype = 'group';
+               else{
+                       $threadid = $cgi{'id'};
+                       if ($threadid =~ /^([0-9]+)$/) {
+                               $threadid = $1;
+                               $pagetype='thread';
+                       }
+                       else {
+                               $pagetype = 'group';
+                       }
                }
        }
        else {
-               $id='';
-               $pagetype = 'group';
+               if($pagemode) {
+                       return;
+               }
+               else {
+                       $threadid='';
+                       $pagetype = 'group';
+               }
        }
+       print " type=$pagetype\n";
        
        for (my $ind=0; $ind<MAX_REDIRECTIONS; ++$ind) {
                %header = readheaderfile($headerpath); 
@@ -183,7 +211,7 @@ sub processfile {
                }
        }
        
-       # REDESIGN THE CONDITIONS!
+       # This condition is redundant now.
        if ($pagetype) {
                
                
@@ -203,18 +231,25 @@ sub processfile {
                my $hidename;
                my $attnumber;
                my $incomplete;
+               my $firstpost;
                
                if ($pagetype eq 'thread') {
-                       print "Thread $id\n";
+                       print "Thread $threadid\n";
                        
-                       $thread{'id'}=$id;
-                       $thread{'groupid'}=$groupid;
+                       $thread{'id'}=$threadid;
+                       $thread{'groupid'}=$$settings{'id'};
                        $thread{'timenumber'}=$timenumber;
                        $mode = 'thread';
                        $level=0;
                        $attnumber=0;
                        $incomplete=0;
                }
+               elsif ($pagetype eq 'post'){
+                       print "Post $postid ($threadid)\n"; 
+                       
+                       $mode='posts';
+                       $firstpost=1;
+               }
                else { #group
                        print "Threads\n";
                        $mode = 'threads';
@@ -227,11 +262,11 @@ sub processfile {
                        close($contentfile);
                        return;
                }
-               while () {
+               while ($mode ne '') {
                        if ($closetag){
                                $tag{'<'} = '/'.$tag{'<'};
                                $tag{'/'}='/';
-                               $tag{"\\"}=undef;
+                               delete $tag{"\\"};
                                $closetag=0;
                        }
                        else {
@@ -254,7 +289,7 @@ sub processfile {
                                        print "Thread [$1]\n";
                                        $mode = 'thread';
                                        %thread = ();
-                                       $thread{'groupid'}=$groupid;
+                                       $thread{'groupid'}=$$settings{'id'};
                                        $thread{'timenumber2'}=$timenumber;
                                        $level = 0;
                                        $attnumber=0;
@@ -577,14 +612,39 @@ sub processfile {
                                        %post = ();
                                        
                                        $post{'id'} = $1;
-                                       $post{'threadid'} = $id;
+                                       $post{'threadid'} = $threadid;
                                        $post{'groupid'} = $$settings{'id'};
                                        $post{'timenumber'} = $timenumber;
                                        
                                        $mode = 'post';
                                        $level=0;
                                        $attnumber=0;
-                                       print "Post $post{'id'}\n";
+                                       
+                                       if($pagetype eq 'post') {
+                                               if(!$groupid) {
+                                                       print "Can't determine if post belongs to group $$settings{'id'}.\n";
+                                                       $mode='';
+                                                       last;
+                                               }
+                                               elsif($post{'id'} eq $postid) {
+                                                       $firstpost=1;
+                                               }
+                                               else {
+                                                       $firstpost=0;
+                                                       $post{'postid'} = $postid;
+                                               }
+                                       }
+                                       print "Post ".((($pagetype eq 'post') and !$firstpost)?"$post{'postid'}/":"")."$post{'id'}\n";
+                               }
+                               elsif (($tag{'<'} eq 'a') and ($pagetype eq 'post') and ($tag{'href'} =~ /^\/groups\/([0-9]+)\/?\?/)) {
+                                       if ($1 eq $$settings{'id'}) {
+                                               $groupid = 1;
+                                       }
+                                       else {
+                                               print "Post does not belong to group $$settings{'id'}.\n";
+                                               $mode = '';
+                                               last;
+                                       }
                                }
                        }
                        
@@ -621,18 +681,32 @@ sub processfile {
                                                                print "Can't mkdir $postpath.\n";
                                                        }
                                                }
-                                               $postpath.='post/';
+                                               if(($pagemode eq 'post')and !$firstpost){
+                                                       $postpath.='postreply/';
+                                               }
+                                               else {
+                                                       $postpath.='post/';
+                                               }
                                                unless (-d $postpath) {
                                                        unless (mkdir $postpath) {
                                                                print "Can't mkdir $postpath.\n";
                                                        }
                                                }
-                                               $postpath.=$thread{'id'}.'/';
+                                               $postpath.=$post{'threadid'}.'/';
                                                unless (-d $postpath) {
                                                        unless (mkdir $postpath) {
                                                                print "Can't mkdir $postpath.\n";
                                                        }
                                                }
+                                               if(($pagemode eq 'post')and !$firstpost){
+                                                       $postpath.=$post{'postid'}.'/';
+                                                       unless (-d $postpath) {
+                                                               unless (mkdir $postpath) {
+                                                                       print "Can't mkdir $postpath.\n";
+                                                               }
+                                                       }
+                                               }
+                                               
                                                $postpath.=$post{'id'};
                                                
                                                if (sysopen ($postfile, $postpath, O_RDWR | O_CREAT)) {
@@ -745,6 +819,9 @@ sub processfile {
                                                $mode = 'post';
                                        }
                                }
+                               elsif ($tag{'<'} eq 'br') {
+                                       $post{'content'}.='<br>';
+                               }
                                elsif ($tag{'<'} eq 'p') {
                                        $post{'content'}.='<p>';
                                }
@@ -875,112 +952,83 @@ sub processfile {
        }
 }
 
-sub readheaderfile {
-       (my $headerpath) = @_;
-       my $headerfile;
-       my %header;
-       
-       if(ref($headerpath)) {
-               $headerfile=$headerpath;
-       }
-       else {
-               unless (open ($headerfile, "<", $headerpath)) {
-                       return %header;
-               }
-       }
-       
-       my $lastname='';
-       
-       while (defined(my $line = <$headerfile>)) {
-               $line =~ s/[\r\n]$//g;
-               # print">> $line <<\n";
-               my $headname='';
-               my $headval='';
-               
-               if($line =~ /^[ \t]+([^ \t](.*[^ \t])?)[ \t]*$/){
-                       if($lastname ne '') {
-                               $header{$lastname}.=$1;
-                       }
-               }
-               elsif ($line =~ /^([^:]+):[ \t]*([^ \t](.*[^ \t])?)[ \t]*$/) {
-                       $headname = lc($1);
-                       $headval = $2;
-                       
-                       if ($header{$headname} ne '') {
-                               $header{$headname}.=', '.$headval;
-                       }
-                       else {
-                               $header{$headname}=$headval;
-                       }
-                       $lastname = $headname;
-               }
-       }
-       
-       unless (ref($headerpath)) {
-               close ($headerfile);
-       }
-       
-       return %header;
-}
-
-# Very similar to header file reading.
+# Function to read data from datafiles.
+# Very similar to http header file reading. (function readheaderfile() in proxy
+# library)
+# 
 # Differences:
 #
-# After field name and colon there must be exactly one whitespace (space or
+# 1. After field name and colon there must be exactly one whitespace (space or
 # tab). Any other leading or trailing whitespace (but not the newline character
 # at the end of the line) is treated as part of the field value.
 #
-# When header field is split into multiple lines the next lines must start with
-# exactly one whitespace (tab or space) Any other leading or trailing whitespace
-# (but not the newline character at the end of the line) is treated as part of
-# the field value. the lines will be joined with a newline between them.
+# 2. When header field is split into multiple lines the next lines must start
+# with exactly one whitespace (tab or space) Any other leading or trailing
+# whitespace (but not the newline character at the end of the line) is treated
+# as part of the field value. the lines will be joined with a newline between
+# them.
+#
+# 3. When the same field name appears it replaces the previous one.
+# 
+# 4. Line separator is LF and not CR LF. The CR character is treated as part of
+# the field value.
 #
-# When the same field name appears it replaces the previous one.
+# Returns a hash containing the values.
+# Names are case sensitive and are converted to lowercase
+#
+# Argument can be a path or a file handle. In case of a file handle it will just
+# read the file. In case of path it opens the file before reading and closes
+# after. On failure (file not open) returns empty hash.
+# 
 sub readdatafile {
-       (my $headerpath) = @_;
-       my $headerfile;
-       my %header;
+       (my $datapath) = @_;
+       my $datafile;
+       my %data;
        
-       if(ref($headerpath)) {
-               $headerfile=$headerpath;
+       # check if $datapath is actually a path or maybe a filehandle
+       # filehandles are references.
+       if(ref($datapath)) {
+               $datafile=$datapath;
        }
        else {
-               unless (open ($headerfile, "<", $headerpath)) {
-                       return %header;
+               unless (open ($datafile, "<", $datapath)) {
+                       return %data;
                }
        }
-       
+
+       # The name of header field in previous line. Required for header fields that
+       # occupy multiple lines.
        my $lastname='';
        
-       while (defined(my $line = <$headerfile>)) {
-               $line =~ s/[\r\n]$//g;
-               my $headname='';
-               my $headval='';
+       while (defined(my $line = <$datafile>)) {
+               $line =~ s/[\n]$//g;
+               my $name='';
+               my $value='';
                
+               # Line starts with whitespace. It's a continuation of the previous line.
+               # Concatenate the field value, separated by newline.
                if($line =~ /^[ \t](.*)$/){
                        if($lastname ne '') {
-                               $header{$lastname}.="\n".$1;
+                               $data{$lastname}.="\n".$1;
                        }
                }
+               # Line starts with a name followed by colon. Save the value
                elsif ($line =~ /^([^:]+):[ \t](.*)$/) {
-                       $headname = lc($1);
-                       $headval = $2;
+                       $name = lc($1);
+                       $value = $2;
                        
-                       # if ($header{$headname} ne '') {
-                               # $header{$headname}.=$headval;
-                       # }
-                       # else {
-                               $header{$headname}=$headval;
-                       # }
-                       $lastname = $headname;
+                       $data{$name}=$value;
+                       
+                       $lastname = $name;
                }
        }
        
-       unless (ref($headerpath)) {
-               close ($headerfile);
+       # If argument was a path the file must be closed. 
+       unless (ref($datapath)) {
+               close ($datafile);
        }
        
-       return %header;
+       return %data;
 }
 
 sub writedatafile {