use Fcntl;
use File::Copy;
###PROXY_LIB;
-use proxy_lib qw(url2path urldiv2path path2urldiv getcgi divideurl joinurl readconfigfile entitydecode urldecode);
+use proxy_lib qw(url2path urldiv2path path2urldiv getcgi divideurl joinurl readconfigfile entitydecode urldecode readheaderfile);
use POSIX qw(strftime);
###ARCH_PATH;
return;
}
- $archpath = url2path('https://m.facebook.com/groups/'.$groupid).'@q/';
-
print "Group $groupid\n";
- processdir($archpath,$groupid,\%settings,\%names);
+ $archpath = url2path('https://m.facebook.com/groups/'.$groupid);
+ processdir($archpath.'@q/',\%settings,\%names,0);
+ processdir($archpath.'/@q/',\%settings,\%names,0);
+
+ $archpath = url2path('https://m.facebook.com/comment/replies');
+ processdir($archpath.'@q/',\%settings,\%names,1);
+ processdir($archpath.'/@q/',\%settings,\%names,1);
+
}
sub processdir {
- (my $dirpath, my $groupid, my $settings, my $names) = @_;
+ (my $dirpath, my $settings, my $names, my $pagemode) = @_;
my $dir;
my $subpath;
my $subpathfull;
+ print "Dir $dirpath\n";
unless ( opendir ($dir, $dirpath)) {
return;
next;
}
if (-f $subpathfull) {
- processfile($subpathfull, $groupid, $settings, $names);
+ processfile($subpathfull, $settings, $names, $pagemode);
}
elsif (-d $subpathfull) {
- processdir($subpathfull.'/', $groupid, $settings, $names);
+ processdir($subpathfull.'/', $settings, $names, $pagemode);
}
}
closedir ($dir);
}
sub processfile {
- (my $headerpath, my $groupid, my $settings, my $names) = @_;
+ (my $headerpath, my $settings, my $names, my $pagemode) = @_;
my $basepath;
my $contentpath;
my $contentfile;
my %cgi;
- my $id;
+ my $postid;
+ my $threadid;
+ my $groupid=0; ###!
my $timenumber;
my %thread;
### REDESIGN THE CONDITIONS!
if($query ne '') {
%cgi=getcgi($query);
- $id = $cgi{'id'};
- if ($id =~ /^[0-9]+$/) {
- $pagetype='thread';
+ if($pagemode) {
+ $postid = $cgi{'ctoken'};
+ if ($postid =~ /^([0-9]+)_([0-9]+)$/) {
+ $threadid = $1;
+ $postid = $2;
+ $pagetype = 'post';
+ }
+ else {
+ return;
+ }
}
- else {
- $pagetype = 'group';
+ else{
+ $threadid = $cgi{'id'};
+ if ($threadid =~ /^([0-9]+)$/) {
+ $threadid = $1;
+ $pagetype='thread';
+ }
+ else {
+ $pagetype = 'group';
+ }
}
}
else {
- $id='';
- $pagetype = 'group';
+ if($pagemode) {
+ return;
+ }
+ else {
+ $threadid='';
+ $pagetype = 'group';
+ }
}
+ print " type=$pagetype\n";
for (my $ind=0; $ind<MAX_REDIRECTIONS; ++$ind) {
%header = readheaderfile($headerpath);
}
}
- # REDESIGN THE CONDITIONS!
+ # This condition is redundant now.
if ($pagetype) {
my $hidename;
my $attnumber;
my $incomplete;
+ my $firstpost;
if ($pagetype eq 'thread') {
- print "Thread $id\n";
+ print "Thread $threadid\n";
- $thread{'id'}=$id;
- $thread{'groupid'}=$groupid;
+ $thread{'id'}=$threadid;
+ $thread{'groupid'}=$$settings{'id'};
$thread{'timenumber'}=$timenumber;
$mode = 'thread';
$level=0;
$attnumber=0;
$incomplete=0;
}
+ elsif ($pagetype eq 'post'){
+ print "Post $postid ($threadid)\n";
+
+ $mode='posts';
+ $firstpost=1;
+ }
else { #group
print "Threads\n";
$mode = 'threads';
close($contentfile);
return;
}
- while () {
+ while ($mode ne '') {
if ($closetag){
$tag{'<'} = '/'.$tag{'<'};
$tag{'/'}='/';
- $tag{"\\"}=undef;
+ delete $tag{"\\"};
$closetag=0;
}
else {
print "Thread [$1]\n";
$mode = 'thread';
%thread = ();
- $thread{'groupid'}=$groupid;
+ $thread{'groupid'}=$$settings{'id'};
$thread{'timenumber2'}=$timenumber;
$level = 0;
$attnumber=0;
%post = ();
$post{'id'} = $1;
- $post{'threadid'} = $id;
+ $post{'threadid'} = $threadid;
$post{'groupid'} = $$settings{'id'};
$post{'timenumber'} = $timenumber;
$mode = 'post';
$level=0;
$attnumber=0;
- print "Post $post{'id'}\n";
+
+ if($pagetype eq 'post') {
+ if(!$groupid) {
+ print "Can't determine if post belongs to group $$settings{'id'}.\n";
+ $mode='';
+ last;
+ }
+ elsif($post{'id'} eq $postid) {
+ $firstpost=1;
+ }
+ else {
+ $firstpost=0;
+ $post{'postid'} = $postid;
+ }
+ }
+ print "Post ".((($pagetype eq 'post') and !$firstpost)?"$post{'postid'}/":"")."$post{'id'}\n";
+ }
+ elsif (($tag{'<'} eq 'a') and ($pagetype eq 'post') and ($tag{'href'} =~ /^\/groups\/([0-9]+)\/?\?/)) {
+ if ($1 eq $$settings{'id'}) {
+ $groupid = 1;
+ }
+ else {
+ print "Post does not belong to group $$settings{'id'}.\n";
+ $mode = '';
+ last;
+ }
}
}
print "Can't mkdir $postpath.\n";
}
}
- $postpath.='post/';
+ if(($pagemode eq 'post')and !$firstpost){
+ $postpath.='postreply/';
+ }
+ else {
+ $postpath.='post/';
+ }
unless (-d $postpath) {
unless (mkdir $postpath) {
print "Can't mkdir $postpath.\n";
}
}
- $postpath.=$thread{'id'}.'/';
+ $postpath.=$post{'threadid'}.'/';
unless (-d $postpath) {
unless (mkdir $postpath) {
print "Can't mkdir $postpath.\n";
}
}
+ if(($pagemode eq 'post')and !$firstpost){
+ $postpath.=$post{'postid'}.'/';
+ unless (-d $postpath) {
+ unless (mkdir $postpath) {
+ print "Can't mkdir $postpath.\n";
+ }
+ }
+ }
+
$postpath.=$post{'id'};
if (sysopen ($postfile, $postpath, O_RDWR | O_CREAT)) {
$mode = 'post';
}
}
+ elsif ($tag{'<'} eq 'br') {
+ $post{'content'}.='<br>';
+ }
elsif ($tag{'<'} eq 'p') {
$post{'content'}.='<p>';
}
}
}
-sub readheaderfile {
- (my $headerpath) = @_;
- my $headerfile;
- my %header;
-
- if(ref($headerpath)) {
- $headerfile=$headerpath;
- }
- else {
- unless (open ($headerfile, "<", $headerpath)) {
- return %header;
- }
- }
-
- my $lastname='';
-
- while (defined(my $line = <$headerfile>)) {
- $line =~ s/[\r\n]$//g;
- # print">> $line <<\n";
- my $headname='';
- my $headval='';
-
- if($line =~ /^[ \t]+([^ \t](.*[^ \t])?)[ \t]*$/){
- if($lastname ne '') {
- $header{$lastname}.=$1;
- }
- }
- elsif ($line =~ /^([^:]+):[ \t]*([^ \t](.*[^ \t])?)[ \t]*$/) {
- $headname = lc($1);
- $headval = $2;
-
- if ($header{$headname} ne '') {
- $header{$headname}.=', '.$headval;
- }
- else {
- $header{$headname}=$headval;
- }
- $lastname = $headname;
- }
- }
-
- unless (ref($headerpath)) {
- close ($headerfile);
- }
-
- return %header;
-}
-
-# Very similar to header file reading.
+# Function to read data from datafiles.
+# Very similar to http header file reading. (function readheaderfile() in proxy
+# library)
+#
# Differences:
#
-# After field name and colon there must be exactly one whitespace (space or
+# 1. After field name and colon there must be exactly one whitespace (space or
# tab). Any other leading or trailing whitespace (but not the newline character
# at the end of the line) is treated as part of the field value.
#
-# When header field is split into multiple lines the next lines must start with
-# exactly one whitespace (tab or space) Any other leading or trailing whitespace
-# (but not the newline character at the end of the line) is treated as part of
-# the field value. the lines will be joined with a newline between them.
+# 2. When header field is split into multiple lines the next lines must start
+# with exactly one whitespace (tab or space) Any other leading or trailing
+# whitespace (but not the newline character at the end of the line) is treated
+# as part of the field value. the lines will be joined with a newline between
+# them.
+#
+# 3. When the same field name appears it replaces the previous one.
+#
+# 4. Line separator is LF and not CR LF. The CR character is treated as part of
+# the field value.
#
-# When the same field name appears it replaces the previous one.
+# Returns a hash containing the values.
+# Names are case sensitive and are converted to lowercase
+#
+# Argument can be a path or a file handle. In case of a file handle it will just
+# read the file. In case of path it opens the file before reading and closes
+# after. On failure (file not open) returns empty hash.
+#
sub readdatafile {
- (my $headerpath) = @_;
- my $headerfile;
- my %header;
+ (my $datapath) = @_;
+ my $datafile;
+ my %data;
- if(ref($headerpath)) {
- $headerfile=$headerpath;
+ # check if $datapath is actually a path or maybe a filehandle
+ # filehandles are references.
+ if(ref($datapath)) {
+ $datafile=$datapath;
}
else {
- unless (open ($headerfile, "<", $headerpath)) {
- return %header;
+ unless (open ($datafile, "<", $datapath)) {
+ return %data;
}
}
-
+
+ # The name of header field in previous line. Required for header fields that
+ # occupy multiple lines.
my $lastname='';
- while (defined(my $line = <$headerfile>)) {
- $line =~ s/[\r\n]$//g;
- my $headname='';
- my $headval='';
+ while (defined(my $line = <$datafile>)) {
+ $line =~ s/[\n]$//g;
+ my $name='';
+ my $value='';
+ # Line starts with whitespace. It's a continuation of the previous line.
+ # Concatenate the field value, separated by newline.
if($line =~ /^[ \t](.*)$/){
if($lastname ne '') {
- $header{$lastname}.="\n".$1;
+ $data{$lastname}.="\n".$1;
}
}
+ # Line starts with a name followed by colon. Save the value
elsif ($line =~ /^([^:]+):[ \t](.*)$/) {
- $headname = lc($1);
- $headval = $2;
+ $name = lc($1);
+ $value = $2;
- # if ($header{$headname} ne '') {
- # $header{$headname}.=$headval;
- # }
- # else {
- $header{$headname}=$headval;
- # }
- $lastname = $headname;
+ $data{$name}=$value;
+
+ $lastname = $name;
}
}
- unless (ref($headerpath)) {
- close ($headerfile);
+ # If argument was a path the file must be closed.
+ unless (ref($datapath)) {
+ close ($datafile);
}
- return %header;
+ return %data;
}
sub writedatafile {