readheaderfile() moved to proxy library.

author b <b@7dec801f-c475-4e67-ba99-809552d69c55>

Wed, 16 Dec 2015 20:54:14 +0000 (20:54 +0000)

committer b <b@7dec801f-c475-4e67-ba99-809552d69c55>

Wed, 16 Dec 2015 20:54:14 +0000 (20:54 +0000)
author b <b@7dec801f-c475-4e67-ba99-809552d69c55>
Wed, 16 Dec 2015 20:54:14 +0000 (20:54 +0000)
committer b <b@7dec801f-c475-4e67-ba99-809552d69c55>
Wed, 16 Dec 2015 20:54:14 +0000 (20:54 +0000)
diff --git a/bot.1.pl b/bot.1.pl

index 82f50ae5a6cfee998f3e1e76c4c0812faca4bacd..de81ef39a27764ef7aca33b48a25f0e656d0f3e5 100644 (file)
--- a/bot.1.pl
+++ b/bot.1.pl
@@ -4,7 +4,7 @@ use strict;
  use Fcntl;
  use File::Copy;
  ###PROXY_LIB;
-use proxy_lib qw(url2path urldiv2path path2urldiv getcgi divideurl joinurl readconfigfile entitydecode urldecode);
+use proxy_lib qw(url2path urldiv2path path2urldiv getcgi divideurl joinurl readconfigfile entitydecode urldecode readheaderfile);
  use POSIX qw(strftime);
  
  ###ARCH_PATH;
@@ -64,18 +64,24 @@ sub processgroup {
                 return;
         }
         
-       $archpath = url2path('https://m.facebook.com/groups/'.$groupid).'@q/';
-       
         print "Group $groupid\n";
         
-       processdir($archpath,$groupid,\%settings,\%names);
+       $archpath = url2path('https://m.facebook.com/groups/'.$groupid);
+       processdir($archpath.'@q/',\%settings,\%names,0);
+       processdir($archpath.'/@q/',\%settings,\%names,0);
+       
+       $archpath = url2path('https://m.facebook.com/comment/replies');
+       processdir($archpath.'@q/',\%settings,\%names,1);
+       processdir($archpath.'/@q/',\%settings,\%names,1);
+       
  }
  
  sub processdir {
-       (my $dirpath, my $groupid, my $settings, my $names) = @_;
+       (my $dirpath, my $settings, my $names, my $pagemode) = @_;
         my $dir;
         my $subpath;
         my $subpathfull;
+       print "Dir $dirpath\n";
         
         unless ( opendir ($dir, $dirpath)) {
                 return;
@@ -86,17 +92,17 @@ sub processdir {
                         next;
                 }
                 if (-f $subpathfull) {
-                       processfile($subpathfull, $groupid, $settings, $names);
+                       processfile($subpathfull, $settings, $names, $pagemode);
                 }
                 elsif (-d $subpathfull) {
-                       processdir($subpathfull.'/', $groupid, $settings, $names);
+                       processdir($subpathfull.'/', $settings, $names, $pagemode);
                 }
         }
         closedir ($dir);        
  }
  
  sub processfile {
-       (my $headerpath, my $groupid, my $settings, my $names) = @_;
+       (my $headerpath, my $settings, my $names, my $pagemode) = @_;
         my $basepath;
         my $contentpath;
         my $contentfile;
@@ -111,7 +117,9 @@ sub processfile {
         
         my %cgi;
         
-       my $id;
+       my $postid;
+       my $threadid;
+       my $groupid=0; ###!
         my $timenumber;
         
         my %thread;
@@ -137,18 +145,38 @@ sub processfile {
         ### REDESIGN THE CONDITIONS!
         if($query ne '') {
                 %cgi=getcgi($query);
-               $id = $cgi{'id'};
-               if ($id =~ /^[0-9]+$/) {
-                       $pagetype='thread';
+               if($pagemode) {
+                       $postid = $cgi{'ctoken'};
+                       if ($postid =~ /^([0-9]+)_([0-9]+)$/) {
+                               $threadid = $1;
+                               $postid = $2;
+                               $pagetype = 'post';
+                       }
+                       else {
+                               return;
+                       }
                 }
-               else {
-                       $pagetype = 'group';
+               else{
+                       $threadid = $cgi{'id'};
+                       if ($threadid =~ /^([0-9]+)$/) {
+                               $threadid = $1;
+                               $pagetype='thread';
+                       }
+                       else {
+                               $pagetype = 'group';
+                       }
                 }
         }
         else {
-               $id='';
-               $pagetype = 'group';
+               if($pagemode) {
+                       return;
+               }
+               else {
+                       $threadid='';
+                       $pagetype = 'group';
+               }
         }
+       print " type=$pagetype\n";
         
         for (my $ind=0; $ind<MAX_REDIRECTIONS; ++$ind) {
                 %header = readheaderfile($headerpath); 
@@ -183,7 +211,7 @@ sub processfile {
                 }
         }
         
-       # REDESIGN THE CONDITIONS!
+       # This condition is redundant now.
         if ($pagetype) {
                 
                 
@@ -203,18 +231,25 @@ sub processfile {
                 my $hidename;
                 my $attnumber;
                 my $incomplete;
+               my $firstpost;
                 
                 if ($pagetype eq 'thread') {
-                       print "Thread $id\n";
+                       print "Thread $threadid\n";
                         
-                       $thread{'id'}=$id;
-                       $thread{'groupid'}=$groupid;
+                       $thread{'id'}=$threadid;
+                       $thread{'groupid'}=$$settings{'id'};
                         $thread{'timenumber'}=$timenumber;
                         $mode = 'thread';
                         $level=0;
                         $attnumber=0;
                         $incomplete=0;
                 }
+               elsif ($pagetype eq 'post'){
+                       print "Post $postid ($threadid)\n"; 
+                       
+                       $mode='posts';
+                       $firstpost=1;
+               }
                 else { #group
                         print "Threads\n";
                         $mode = 'threads';
@@ -227,11 +262,11 @@ sub processfile {
                         close($contentfile);
                         return;
                 }
-               while () {
+               while ($mode ne '') {
                         if ($closetag){
                                 $tag{'<'} = '/'.$tag{'<'};
                                 $tag{'/'}='/';
-                               $tag{"\\"}=undef;
+                               delete $tag{"\\"};
                                 $closetag=0;
                         }
                         else {
@@ -254,7 +289,7 @@ sub processfile {
                                         print "Thread [$1]\n";
                                         $mode = 'thread';
                                         %thread = ();
-                                       $thread{'groupid'}=$groupid;
+                                       $thread{'groupid'}=$$settings{'id'};
                                         $thread{'timenumber2'}=$timenumber;
                                         $level = 0;
                                         $attnumber=0;
@@ -577,14 +612,39 @@ sub processfile {
                                         %post = ();
                                         
                                         $post{'id'} = $1;
-                                       $post{'threadid'} = $id;
+                                       $post{'threadid'} = $threadid;
                                         $post{'groupid'} = $$settings{'id'};
                                         $post{'timenumber'} = $timenumber;
                                         
                                         $mode = 'post';
                                         $level=0;
                                         $attnumber=0;
-                                       print "Post $post{'id'}\n";
+                                       
+                                       if($pagetype eq 'post') {
+                                               if(!$groupid) {
+                                                       print "Can't determine if post belongs to group $$settings{'id'}.\n";
+                                                       $mode='';
+                                                       last;
+                                               }
+                                               elsif($post{'id'} eq $postid) {
+                                                       $firstpost=1;
+                                               }
+                                               else {
+                                                       $firstpost=0;
+                                                       $post{'postid'} = $postid;
+                                               }
+                                       }
+                                       print "Post ".((($pagetype eq 'post') and !$firstpost)?"$post{'postid'}/":"")."$post{'id'}\n";
+                               }
+                               elsif (($tag{'<'} eq 'a') and ($pagetype eq 'post') and ($tag{'href'} =~ /^\/groups\/([0-9]+)\/?\?/)) {
+                                       if ($1 eq $$settings{'id'}) {
+                                               $groupid = 1;
+                                       }
+                                       else {
+                                               print "Post does not belong to group $$settings{'id'}.\n";
+                                               $mode = '';
+                                               last;
+                                       }
                                 }
                         }
                         
@@ -621,18 +681,32 @@ sub processfile {
                                                                 print "Can't mkdir $postpath.\n";
                                                         }
                                                 }
-                                               $postpath.='post/';
+                                               if(($pagemode eq 'post')and !$firstpost){
+                                                       $postpath.='postreply/';
+                                               }
+                                               else {
+                                                       $postpath.='post/';
+                                               }
                                                 unless (-d $postpath) {
                                                         unless (mkdir $postpath) {
                                                                 print "Can't mkdir $postpath.\n";
                                                         }
                                                 }
-                                               $postpath.=$thread{'id'}.'/';
+                                               $postpath.=$post{'threadid'}.'/';
                                                 unless (-d $postpath) {
                                                         unless (mkdir $postpath) {
                                                                 print "Can't mkdir $postpath.\n";
                                                         }
                                                 }
+                                               if(($pagemode eq 'post')and !$firstpost){
+                                                       $postpath.=$post{'postid'}.'/';
+                                                       unless (-d $postpath) {
+                                                               unless (mkdir $postpath) {
+                                                                       print "Can't mkdir $postpath.\n";
+                                                               }
+                                                       }
+                                               }
+                                               
                                                 $postpath.=$post{'id'};
                                                 
                                                 if (sysopen ($postfile, $postpath, O_RDWR | O_CREAT)) {
@@ -745,6 +819,9 @@ sub processfile {
                                                 $mode = 'post';
                                         }
                                 }
+                               elsif ($tag{'<'} eq 'br') {
+                                       $post{'content'}.='<br>';
+                               }
                                 elsif ($tag{'<'} eq 'p') {
                                         $post{'content'}.='<p>';
                                 }
@@ -875,112 +952,83 @@ sub processfile {
         }
  }
  
-sub readheaderfile {
-       (my $headerpath) = @_;
-       my $headerfile;
-       my %header;
-       
-       if(ref($headerpath)) {
-               $headerfile=$headerpath;
-       }
-       else {
-               unless (open ($headerfile, "<", $headerpath)) {
-                       return %header;
-               }
-       }
-       
-       my $lastname='';
-       
-       while (defined(my $line = <$headerfile>)) {
-               $line =~ s/[\r\n]$//g;
-               # print">> $line <<\n";
-               my $headname='';
-               my $headval='';
-               
-               if($line =~ /^[ \t]+([^ \t](.*[^ \t])?)[ \t]*$/){
-                       if($lastname ne '') {
-                               $header{$lastname}.=$1;
-                       }
-               }
-               elsif ($line =~ /^([^:]+):[ \t]*([^ \t](.*[^ \t])?)[ \t]*$/) {
-                       $headname = lc($1);
-                       $headval = $2;
-                       
-                       if ($header{$headname} ne '') {
-                               $header{$headname}.=', '.$headval;
-                       }
-                       else {
-                               $header{$headname}=$headval;
-                       }
-                       $lastname = $headname;
-               }
-       }
-       
-       unless (ref($headerpath)) {
-               close ($headerfile);
-       }
-       
-       return %header;
-}
-
-# Very similar to header file reading.
+# Function to read data from datafiles.
+# Very similar to http header file reading. (function readheaderfile() in proxy
+# library)
+# 
  # Differences:
  #
-# After field name and colon there must be exactly one whitespace (space or
+# 1. After field name and colon there must be exactly one whitespace (space or
  # tab). Any other leading or trailing whitespace (but not the newline character
  # at the end of the line) is treated as part of the field value.
  #
-# When header field is split into multiple lines the next lines must start with
-# exactly one whitespace (tab or space) Any other leading or trailing whitespace
-# (but not the newline character at the end of the line) is treated as part of
-# the field value. the lines will be joined with a newline between them.
+# 2. When header field is split into multiple lines the next lines must start
+# with exactly one whitespace (tab or space) Any other leading or trailing
+# whitespace (but not the newline character at the end of the line) is treated
+# as part of the field value. the lines will be joined with a newline between
+# them.
+#
+# 3. When the same field name appears it replaces the previous one.
+# 
+# 4. Line separator is LF and not CR LF. The CR character is treated as part of
+# the field value.
  #
-# When the same field name appears it replaces the previous one.
+# Returns a hash containing the values.
+# Names are case sensitive and are converted to lowercase
+#
+# Argument can be a path or a file handle. In case of a file handle it will just
+# read the file. In case of path it opens the file before reading and closes
+# after. On failure (file not open) returns empty hash.
+# 
  sub readdatafile {
-       (my $headerpath) = @_;
-       my $headerfile;
-       my %header;
+       (my $datapath) = @_;
+       my $datafile;
+       my %data;
         
-       if(ref($headerpath)) {
-               $headerfile=$headerpath;
+       # check if $datapath is actually a path or maybe a filehandle
+       # filehandles are references.
+       if(ref($datapath)) {
+               $datafile=$datapath;
         }
         else {
-               unless (open ($headerfile, "<", $headerpath)) {
-                       return %header;
+               unless (open ($datafile, "<", $datapath)) {
+                       return %data;
                 }
         }
-       
+
+       # The name of header field in previous line. Required for header fields that
+       # occupy multiple lines.
         my $lastname='';
         
-       while (defined(my $line = <$headerfile>)) {
-               $line =~ s/[\r\n]$//g;
-               my $headname='';
-               my $headval='';
+       while (defined(my $line = <$datafile>)) {
+               $line =~ s/[\n]$//g;
+               my $name='';
+               my $value='';
                 
+               # Line starts with whitespace. It's a continuation of the previous line.
+               # Concatenate the field value, separated by newline.
                 if($line =~ /^[ \t](.*)$/){
                         if($lastname ne '') {
-                               $header{$lastname}.="\n".$1;
+                               $data{$lastname}.="\n".$1;
                         }
                 }
+               # Line starts with a name followed by colon. Save the value
                 elsif ($line =~ /^([^:]+):[ \t](.*)$/) {
-                       $headname = lc($1);
-                       $headval = $2;
+                       $name = lc($1);
+                       $value = $2;
                         
-                       # if ($header{$headname} ne '') {
-                               # $header{$headname}.=$headval;
-                       # }
-                       # else {
-                               $header{$headname}=$headval;
-                       # }
-                       $lastname = $headname;
+                       $data{$name}=$value;
+                       
+                       $lastname = $name;
                 }
         }
         
-       unless (ref($headerpath)) {
-               close ($headerfile);
+       # If argument was a path the file must be closed. 
+       unless (ref($datapath)) {
+               close ($datafile);
         }
         
-       return %header;
+       return %data;
  }
  
  sub writedatafile {
author	b <b@7dec801f-c475-4e67-ba99-809552d69c55>
	Wed, 16 Dec 2015 20:54:14 +0000 (20:54 +0000)
committer	b <b@7dec801f-c475-4e67-ba99-809552d69c55>
	Wed, 16 Dec 2015 20:54:14 +0000 (20:54 +0000)