From 66594db75b45dc0800fe7ef82839d8efaa4cf5ab Mon Sep 17 00:00:00 2001 From: b Date: Wed, 16 Dec 2015 20:54:14 +0000 Subject: [PATCH] readheaderfile() moved to proxy library. can process all 3 types of facebook pages now. git-svn-id: svn://botcastle1b/yplom/facebug1@8 7dec801f-c475-4e67-ba99-809552d69c55 --- bot.1.pl | 262 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 155 insertions(+), 107 deletions(-) diff --git a/bot.1.pl b/bot.1.pl index 82f50ae..de81ef3 100644 --- a/bot.1.pl +++ b/bot.1.pl @@ -4,7 +4,7 @@ use strict; use Fcntl; use File::Copy; ###PROXY_LIB; -use proxy_lib qw(url2path urldiv2path path2urldiv getcgi divideurl joinurl readconfigfile entitydecode urldecode); +use proxy_lib qw(url2path urldiv2path path2urldiv getcgi divideurl joinurl readconfigfile entitydecode urldecode readheaderfile); use POSIX qw(strftime); ###ARCH_PATH; @@ -64,18 +64,24 @@ sub processgroup { return; } - $archpath = url2path('https://m.facebook.com/groups/'.$groupid).'@q/'; - print "Group $groupid\n"; - processdir($archpath,$groupid,\%settings,\%names); + $archpath = url2path('https://m.facebook.com/groups/'.$groupid); + processdir($archpath.'@q/',\%settings,\%names,0); + processdir($archpath.'/@q/',\%settings,\%names,0); + + $archpath = url2path('https://m.facebook.com/comment/replies'); + processdir($archpath.'@q/',\%settings,\%names,1); + processdir($archpath.'/@q/',\%settings,\%names,1); + } sub processdir { - (my $dirpath, my $groupid, my $settings, my $names) = @_; + (my $dirpath, my $settings, my $names, my $pagemode) = @_; my $dir; my $subpath; my $subpathfull; + print "Dir $dirpath\n"; unless ( opendir ($dir, $dirpath)) { return; @@ -86,17 +92,17 @@ sub processdir { next; } if (-f $subpathfull) { - processfile($subpathfull, $groupid, $settings, $names); + processfile($subpathfull, $settings, $names, $pagemode); } elsif (-d $subpathfull) { - processdir($subpathfull.'/', $groupid, $settings, $names); + processdir($subpathfull.'/', $settings, $names, $pagemode); } } closedir ($dir); } sub processfile { - (my $headerpath, my $groupid, my $settings, my $names) = @_; + (my $headerpath, my $settings, my $names, my $pagemode) = @_; my $basepath; my $contentpath; my $contentfile; @@ -111,7 +117,9 @@ sub processfile { my %cgi; - my $id; + my $postid; + my $threadid; + my $groupid=0; ###! my $timenumber; my %thread; @@ -137,18 +145,38 @@ sub processfile { ### REDESIGN THE CONDITIONS! if($query ne '') { %cgi=getcgi($query); - $id = $cgi{'id'}; - if ($id =~ /^[0-9]+$/) { - $pagetype='thread'; + if($pagemode) { + $postid = $cgi{'ctoken'}; + if ($postid =~ /^([0-9]+)_([0-9]+)$/) { + $threadid = $1; + $postid = $2; + $pagetype = 'post'; + } + else { + return; + } } - else { - $pagetype = 'group'; + else{ + $threadid = $cgi{'id'}; + if ($threadid =~ /^([0-9]+)$/) { + $threadid = $1; + $pagetype='thread'; + } + else { + $pagetype = 'group'; + } } } else { - $id=''; - $pagetype = 'group'; + if($pagemode) { + return; + } + else { + $threadid=''; + $pagetype = 'group'; + } } + print " type=$pagetype\n"; for (my $ind=0; $ind)) { - $line =~ s/[\r\n]$//g; - # print">> $line <<\n"; - my $headname=''; - my $headval=''; - - if($line =~ /^[ \t]+([^ \t](.*[^ \t])?)[ \t]*$/){ - if($lastname ne '') { - $header{$lastname}.=$1; - } - } - elsif ($line =~ /^([^:]+):[ \t]*([^ \t](.*[^ \t])?)[ \t]*$/) { - $headname = lc($1); - $headval = $2; - - if ($header{$headname} ne '') { - $header{$headname}.=', '.$headval; - } - else { - $header{$headname}=$headval; - } - $lastname = $headname; - } - } - - unless (ref($headerpath)) { - close ($headerfile); - } - - return %header; -} - -# Very similar to header file reading. +# Function to read data from datafiles. +# Very similar to http header file reading. (function readheaderfile() in proxy +# library) +# # Differences: # -# After field name and colon there must be exactly one whitespace (space or +# 1. After field name and colon there must be exactly one whitespace (space or # tab). Any other leading or trailing whitespace (but not the newline character # at the end of the line) is treated as part of the field value. # -# When header field is split into multiple lines the next lines must start with -# exactly one whitespace (tab or space) Any other leading or trailing whitespace -# (but not the newline character at the end of the line) is treated as part of -# the field value. the lines will be joined with a newline between them. +# 2. When header field is split into multiple lines the next lines must start +# with exactly one whitespace (tab or space) Any other leading or trailing +# whitespace (but not the newline character at the end of the line) is treated +# as part of the field value. the lines will be joined with a newline between +# them. +# +# 3. When the same field name appears it replaces the previous one. +# +# 4. Line separator is LF and not CR LF. The CR character is treated as part of +# the field value. # -# When the same field name appears it replaces the previous one. +# Returns a hash containing the values. +# Names are case sensitive and are converted to lowercase +# +# Argument can be a path or a file handle. In case of a file handle it will just +# read the file. In case of path it opens the file before reading and closes +# after. On failure (file not open) returns empty hash. +# sub readdatafile { - (my $headerpath) = @_; - my $headerfile; - my %header; + (my $datapath) = @_; + my $datafile; + my %data; - if(ref($headerpath)) { - $headerfile=$headerpath; + # check if $datapath is actually a path or maybe a filehandle + # filehandles are references. + if(ref($datapath)) { + $datafile=$datapath; } else { - unless (open ($headerfile, "<", $headerpath)) { - return %header; + unless (open ($datafile, "<", $datapath)) { + return %data; } } - + + # The name of header field in previous line. Required for header fields that + # occupy multiple lines. my $lastname=''; - while (defined(my $line = <$headerfile>)) { - $line =~ s/[\r\n]$//g; - my $headname=''; - my $headval=''; + while (defined(my $line = <$datafile>)) { + $line =~ s/[\n]$//g; + my $name=''; + my $value=''; + # Line starts with whitespace. It's a continuation of the previous line. + # Concatenate the field value, separated by newline. if($line =~ /^[ \t](.*)$/){ if($lastname ne '') { - $header{$lastname}.="\n".$1; + $data{$lastname}.="\n".$1; } } + # Line starts with a name followed by colon. Save the value elsif ($line =~ /^([^:]+):[ \t](.*)$/) { - $headname = lc($1); - $headval = $2; + $name = lc($1); + $value = $2; - # if ($header{$headname} ne '') { - # $header{$headname}.=$headval; - # } - # else { - $header{$headname}=$headval; - # } - $lastname = $headname; + $data{$name}=$value; + + $lastname = $name; } } - unless (ref($headerpath)) { - close ($headerfile); + # If argument was a path the file must be closed. + unless (ref($datapath)) { + close ($datafile); } - return %header; + return %data; } sub writedatafile { -- 2.30.2