my %thread;
my %thread2;
+ my %post;
+ my %post2;
+
if ($headerpath =~ /^((.+)\@h)$/) {
$headerpath = $1;
$basepath = $2;
my %tag;
my $mode = 'thread';
my $level = 0;
+ my $level2;
my $closetag=0;
my $ignoretext;
my $link;
$level=0;
$ignoretext=1;
$hidename=0;
+ $link=0;
}
elsif ($tag{'<'} eq 'abbr') {
$mode = 'thread-time';
$mode='posts';
my $threadfile;
- my $threadpath = ARCH_PATH.$$settings{'id'}.'/'.'thread/'.$thread{'id'};
+ my $threadpath = ARCH_PATH.$$settings{'id'}.'/';
+ unless (-d $threadpath) {
+ unless (mkdir $threadpath) {
+ print "Can't mkdir $threadpath.\n";
+ }
+ }
+ $threadpath.='thread/';
+ unless (-d $threadpath) {
+ unless (mkdir $threadpath) {
+ print "Can't mkdir $threadpath.\n";
+ }
+ }
+ $threadpath.=$thread{'id'};
# if (open ($threadfile, "+<", $threadpath)) {
if (sysopen ($threadfile, $threadpath, O_RDWR | O_CREAT)) {
if (flock ($threadfile, 2)) {
- # In the future it might be necessary to use a different function.
- %thread2 = readheaderfile($threadfile);
+ %thread2 = readdatafile($threadfile);
if (($thread2{'timenumber'} ne '')and($thread2{'timenumber'}>$thread{'timenumber'})) {
print ("Newer version already saved.\n\n");
}
else {
+ foreach my $ind (keys %thread2) {
+ if($ind =~ /^((img(key)?)|(link(text|title)?))-[0-9]+$/) {
+ delete $thread2{$ind};
+ }
+ }
foreach my $ind (keys %thread) {
$thread2{$ind}=$thread{$ind};
}
}
if (seek($threadfile, 0, 0)) {
+ writedatafile($threadfile,%thread2);
+ truncate ($threadfile , tell($threadfile));
+
foreach my $ind (keys %thread2) {
- print $threadfile "$ind: $thread2{$ind}\n";
print "$ind: $thread2{$ind}\n"; ####
}
- print $threadfile "\n";
- truncate ($threadfile , tell($threadfile));
print "saved.\n\n";
}
else {
- print "Failed seek $threadfile.\n\n";
+ print "Can't seek $threadfile.\n\n";
}
}
}
}
}
}
+
elsif ($mode eq 'thread-author') {
if ($tag{'<'} eq 'a') {
if ($tag{'href'} =~ /^\/([A-Za-z0-9\.]+)(\?.*)?$/) {
$mode='thread';
}
}
+
elsif ($mode eq 'thread-content') {
if ($tag{'<'} eq 'div') { # There should not be any sub<div>s!
++$level;
elsif ($tag{'<'} eq '/div') {
if($level){
--$level;
- $thread{'postcontent'}.='<div>';
+ $thread{'postcontent'}.='</div>';
}
else {
$mode = 'thread';
}
}
+
elsif ($mode eq 'thread-time') {
if ($tag{'<'} eq '/abbr') {
$mode = 'thread';
elsif ($tag{'<'} eq 'a') {
if ($tag{'href'} =~ /^\/photo\.php\?(.*&)?fbid=([0-9]+)(&.*)?$/) {
++$attnumber;
- $thread{'img-'.$attnumber}=$2;
+ $thread{'img-'.$attnumber}='a_'.$2;
$mode = 'thread-attachment-img';
}
elsif ($tag{'href'} =~ /^https?:\/\/([a-z0-9\.\-]+)?facebook\.com\/l\.php\?(.*&)?u=([^&]+)(&.*)?$/) {
$thread{'imgkey-'.$attnumber}=$imgkey;
}
else {
- $thread{'img-'.$attnumber}=undef;
+ delete $thread{'img-'.$attnumber};
--$attnumber;
}
}
}
elsif (($tag{'<'} eq 'img')and($tag{'src'} =~ /^https?:\/\/([a-z0-9\.\-]+)?fbcdn\.net\/safe_image\.php\?(.*&)?url=([^&]+)(&.*)?$/)) {
my $imgurl = urldecode($3);
- my $imgid='';
+ my $imgid='i_';
$imgurl =~ s/([^A-Za-z0-9_\.])/sprintf ("@%02X",ord($1))/eg;
while(length($imgurl)>240) {
$mode = 'thread-attachment-link';
}
}
+
+
+ elsif ($mode eq 'posts') {
+ if (($tag{'<'} eq 'div') and ($tag{'id'} =~ /^([0-9]+)$/)) {
+ %post = ();
+
+ $post{'id'} = $1;
+ $post{'threadid'} = $id;
+ $post{'groupid'} = $$settings{'id'};
+ $post{'timenumber'} = $timenumber;
+
+ $mode = 'post';
+ $level=0;
+ $attnumber=0;
+ print "Post $post{'id'}\n";
+ }
+ }
+
+ elsif ($mode eq 'post') {
+ if ($tag {'<'} eq 'h3') {
+ $mode = 'post-author';
+ }
+ elsif ($tag{'<'} eq 'abbr') {
+ $mode = 'post-time';
+ }
+ if ($tag{'<'} eq 'div') {
+ if(($tag{'class'} eq '')and($post{'content'} eq '')) {
+ $mode = 'post-content';
+ $level2=0;
+ $ignoretext=0;
+ $hidename=0;
+ $link=0;
+ }
+ else {
+ ++$level;
+ }
+ }
+ elsif ($tag{'<'} eq '/div') {
+ if ($level) {
+ --$level;
+ }
+ else {
+ $mode = 'posts';
+
+ my $postfile;
+ my $postpath = ARCH_PATH.$$settings{'id'}.'/';
+ unless (-d $postpath) {
+ unless (mkdir $postpath) {
+ print "Can't mkdir $postpath.\n";
+ }
+ }
+ $postpath.='post/';
+ unless (-d $postpath) {
+ unless (mkdir $postpath) {
+ print "Can't mkdir $postpath.\n";
+ }
+ }
+ $postpath.=$thread{'id'}.'/';
+ unless (-d $postpath) {
+ unless (mkdir $postpath) {
+ print "Can't mkdir $postpath.\n";
+ }
+ }
+ $postpath.=$post{'id'};
+
+ if (sysopen ($postfile, $postpath, O_RDWR | O_CREAT)) {
+ if (flock ($postfile, 2)) {
+ %post2 = readdatafile($postfile);
+
+ if (($post2{'timenumber'} ne '')and($post2{'timenumber'}>$post{'timenumber'})) {
+ print ("Newer version already saved.\n\n");
+ }
+ else {
+ foreach my $ind (keys %post2) {
+ if($ind =~ /^img(key)?-[0-9]+$/) {
+ delete $post2{$ind};
+ }
+ }
+ foreach my $ind (keys %post) {
+ $post2{$ind}=$post{$ind};
+ }
+ if ($post2{'key'} eq '') {
+ $post2{'key'} = key(KEY_BITS);
+ }
+ if (seek($postfile, 0, 0)) {
+ writedatafile($postfile,%post2);
+ truncate ($postfile , tell($postfile));
+
+ foreach my $ind (keys %post2) {
+ print "$ind: $post2{$ind}\n";
+ }
+ print "saved.\n\n";
+ }
+ else {
+ print "Can't seek $postfile.\n\n";
+ }
+ }
+ }
+ else {
+ print "Can't lock $postfile.\n\n";
+ }
+ close ($postfile);
+ }
+ else
+ {
+ print "Can't open $postpath.\n\n";
+ }
+ }
+ }
+
+ elsif ($tag{'<'} eq 'a') {
+ if ($tag{'href'} =~ /^\/photo\.php\?(.*&)?fbid=([0-9]+)(&.*)?$/) {
+ ++$attnumber;
+ $post{'img-'.$attnumber}='a_'.$2;
+ $mode = 'post-img';
+ }
+ elsif ($tag{'href'} =~ /^\/comment\/replies\/?\?/) {
+ $mode = 'post-replies';
+ }
+ }
+ }
+
+ elsif ($mode eq 'post-img') {
+ if ($tag{'<'} eq 'img') {
+ my $imgkey = saveimg($tag{'src'},$post{'img-'.$attnumber},$$settings{'id'});
+ if ($imgkey ne '') {
+ $post{'imgkey-'.$attnumber}=$imgkey;
+ }
+ else {
+ delete $post{'img-'.$attnumber};
+ --$attnumber;
+ }
+ }
+ elsif ($tag{'<'} eq '/a') {
+ $mode = 'post';
+ }
+ }
+
+
+ elsif ($mode eq 'post-author') {
+ if ($tag{'<'} eq 'a') {
+ if ($tag{'href'} =~ /^\/([A-Za-z0-9\.]+)(\?.*)?$/) {
+ my $author = $1;
+ if ($post{'author'} eq '') {
+ $post{'author'} = $author;
+ $post{'name'} = ($$names{$author} ne '')?$$names{$author}:$$names{'default'};
+ }
+ }
+ }
+ elsif ($tag{'<'} eq '/h3') {
+ $mode='post';
+ }
+ }
+
+ elsif ($mode eq 'post-content') {
+ if ($tag{'<'} eq 'div') { # There should not be any sub<div>s!
+ ++$level2;
+ $post{'content'}.='<div>';
+ }
+ elsif ($tag{'<'} eq '/div') {
+ if($level2){
+ --$level2;
+ $post{'content'}.='</div>';
+ }
+ else {
+ $mode = 'post';
+ }
+ }
+ elsif ($tag{'<'} eq 'p') {
+ $post{'content'}.='<p>';
+ }
+ elsif ($tag{'<'} eq '/p') {
+ $post{'content'}.='</p>';
+ }
+ elsif (!$ignoretext) {
+ if (($tag{'<'} eq 'img')and($tag{'src'} =~ /^https?:\/\/([a-z0-9\.\-]+)?fbcdn\.net\/rsrc\.php\/(.*)$/)) {
+ my $imgurl = urldecode($2);
+ my $imgid = 'r_';
+ $imgurl =~ s/([^A-Za-z0-9_\.])/sprintf ("@%02X",ord($1))/eg;
+
+ while(length($imgurl)>240) {
+ $imgid.=substr($imgurl,0,120).'-/';
+ $imgurl=substr($imgurl,120);
+ }
+ $imgid.=$imgurl;
+
+ my $imgkey = saveimg($tag{'src'},$imgid,$$settings{'id'});
+ if ($imgkey ne '') {
+ $post{'content'}.='<img src="&img@i'.$imgid.'@k'.$imgkey.';" alt="'.$tag{'alt'}.'">';
+ }
+ else {
+ $post{'content'}.='<img src="" alt="'.$tag{'alt'}.'">';
+ }
+
+ }
+ elsif ($tag{'<'} eq 'a') {
+ if ($tag{'href'} =~ /^https?:\/\/([a-z0-9\.\-]+)?facebook\.com\/l\.php\?(.*&)?u=([^&]+)(&.*)?$/) {
+ $post{'content'}.='<a href="'.urldecode($3).'">';
+ $link=1;
+ }
+ elsif ($tag{'href'} =~ /^\/([A-Za-z0-9\.]+)(\?.*)$/) {
+ $post{'content'}.='<a href="#">'.(($$names{$1} ne '')?$$names{$1}:$$names{'default'});
+ $link=1;
+ $hidename=1;
+ }
+ }
+ elsif ($tag{'<'} eq '/a') {
+ if($link) {
+ $post{'content'}.='</a>';
+ $link=0;
+ $hidename=0;
+ }
+ }
+ else {
+ $post{'content'}.='<!'.$tag{'<'}.'!>';
+ }
+ }
+
+ }
+
+ elsif ($mode eq 'post-time') {
+ if ($tag{'<'} eq '/abbr') {
+ $mode = 'post';
+ }
+ }
+
+ elsif ($mode eq 'post-replies') {
+ if ($tag{'<'} eq '/a') {
+ $mode = 'post';
+ }
+ }
+
+
if ($tag{"\\"} ne '') {
$closetag = 1;
next;
}
-
local $/ = '<';
unless (defined ($text = <$contentfile>)) {
close($contentfile);
$thread{'linktext-'.$attnumber}.=$text;
}
- # print "text: $text\n";
+ if($mode eq 'post-content') {
+ unless ($ignoretext or $hidename){
+ $post{'content'}.=$text;
+ }
+ }
+ elsif ($mode eq 'post-time') {
+ $post{'timetext'}.=$text;
+ }
+ elsif ($mode eq 'post-replies') {
+ if($text =~ /^[ \t\r\n]*([0-9]+)[ \t\r\n]+repl(y|ies)/) {
+ $post{'replies'} = $1;
+ }
+ }
+
}
- # while (defined ($line = <$contentfile>)) {
- # if ($firstline) {
- # $firstline = 0;
- # next;
- # }
- # # print "$line\n";
- # }
close ($contentfile);
}
my $headname='';
my $headval='';
- if($line =~ /^[ \t]+([^ \t].*)$/){
+ if($line =~ /^[ \t]+([^ \t](.*[^ \t])?)[ \t]*$/){
if($lastname ne '') {
$header{$lastname}.=$1;
}
}
- elsif ($line =~ /^([^:]*):[ \t]*([^ \t](.*[^ \t])?)[ \t]*$/) {
+ elsif ($line =~ /^([^:]+):[ \t]*([^ \t](.*[^ \t])?)[ \t]*$/) {
$headname = lc($1);
$headval = $2;
return %header;
}
+# Very similar to header file reading.
+# Differences:
+#
+# After field name and colon there must be exactly one whitespace (space or
+# tab). Any other leading or trailing whitespace (but not the newline character
+# at the end of the line) is treated as part of the field value.
+#
+# When header field is split into multiple lines the next lines must start with
+# exactly one whitespace (tab or space) Any other leading or trailing whitespace
+# (but not the newline character at the end of the line) is treated as part of
+# the field value. the lines will be joined with a newline between them.
+#
+# When the same field name appears it replaces the previous one.
+sub readdatafile {
+ (my $headerpath) = @_;
+ my $headerfile;
+ my %header;
+
+ if(ref($headerpath)) {
+ $headerfile=$headerpath;
+ }
+ else {
+ unless (open ($headerfile, "<", $headerpath)) {
+ return %header;
+ }
+ }
+
+ my $lastname='';
+
+ while (defined(my $line = <$headerfile>)) {
+ $line =~ s/[\r\n]$//g;
+ my $headname='';
+ my $headval='';
+
+ if($line =~ /^[ \t](.*)$/){
+ if($lastname ne '') {
+ $header{$lastname}.="\n".$1;
+ }
+ }
+ elsif ($line =~ /^([^:]+):[ \t](.*)$/) {
+ $headname = lc($1);
+ $headval = $2;
+
+ # if ($header{$headname} ne '') {
+ # $header{$headname}.=$headval;
+ # }
+ # else {
+ $header{$headname}=$headval;
+ # }
+ $lastname = $headname;
+ }
+ }
+
+ unless (ref($headerpath)) {
+ close ($headerfile);
+ }
+
+ return %header;
+}
+
+sub writedatafile {
+ (my $headerpath, my %header) = @_;
+ my $headerfile;
+
+ if(ref($headerpath)) {
+ $headerfile=$headerpath;
+ }
+ else {
+ unless (open ($headerfile, ">", $headerpath)) {
+ return 0;
+ }
+ }
+
+ foreach my $ind (keys %header) {
+ my $headname = $ind;
+ my $headval = $header{$ind};
+ $headval =~ s/\r//g;
+ $headval =~ s/\n/\n /g;
+ print $headerfile "$headname: $headval\n";
+ }
+ print $headerfile "\n";
+
+ unless (ref($headerpath)) {
+ close ($headerfile);
+ }
+
+ return 1;
+}
+
sub gettimenumber {
(my $date) = @_;
my $year;
(my $prot, my $host, my $port, my $path, my $query);
my @stat;
- print " Image $id\n";
+ print "Image $id\n";
+
+ $basepath=ARCH_PATH.$groupid.'/';
+ unless (-d $basepath){
+ unless (mkdir $basepath) {
+ print "Can't mkdir $basepath.\n";
+ return '';
+ }
+ }
+ $basepath.='image/';
+ unless (-d $basepath){
+ unless (mkdir $basepath) {
+ print "Can't mkdir $basepath.\n";
+ return '';
+ }
+ }
+
- $basepath=ARCH_PATH.$groupid.'/image/';
my $idtemp = $id;
while((my $ind = index($idtemp,'/'))>=0) {
$basepath.=substr($idtemp, 0, $ind+1);
$idtemp=substr($idtemp,$ind+1);
unless (-d $basepath){
unless (mkdir $basepath) {
- print " Can't mkdir $basepath.\n";
+ print "Can't mkdir $basepath.\n";
return '';
}
}
if (flock ($headfile, 2)) {
%header = readheaderfile($headfile);
if ($header{'key'} ne '') {
- print " Already saved.\n\n";
+ print "Already saved.\n\n";
close($headfile);
return $header{'key'};
}
}
}
else {
- print " Can't lock $headpath.\n\n";
+ print "Can't lock $headpath.\n\n";
close($headfile);
return '';
}
$archbasepath = urldiv2path($prot, $host, $port, $path, $query);
$archheadpath = $archbasepath.'@h';
$archimgpath = $archbasepath.'@v';
- print " url: $url\n";
+ print "url: $url\n";
for (my $ind=0; $ind<MAX_REDIRECTIONS; ++$ind) {
%archheader = readheaderfile($archheadpath);
if ($location !~ /^[a-z]+:\/\//) {
$location = $prot.'://'.$host.(($port ne '')?(':'.$port):'').$location;
}
- print " Redirect: $location\n";
+ print "Redirect: $location\n";
($prot, $host, $port, $path, $query) = divideurl($location);
$archbasepath = urldiv2path($prot, $host, $port, $path, $query);
$archheadpath = $archbasepath.'@h';
}
}
if ($archheader{'status'} !~ /^200 /) {
- print " Not found ($url $archheader{'status'}).\n\n";
+ print "Not found ($archheader{'status'}).\n\n";
if ($headopen) {
close ($headfile);
}
$header{'content-length'}=$stat[7];
}
else {
- print " Can't stat $imgpath. $stat[7]\n";
+ print "Can't stat $imgpath. $stat[7]\n";
}
$header{'id'}=$id;
$header{'groupid'}=$groupid;
if ($headopen) {
unless (seek($headfile,0,0)) {
- print " Can't seek $headpath.\n\n";
+ print "Can't seek $headpath.\n\n";
close ($headfile);
return '';
}
}
else{
unless (open($headfile,">",$headpath)) {
- print " Can't open $headpath.\n\n";
+ print "Can't open $headpath.\n\n";
return '';
}
unless (flock ($headfile, 2)) {
- print " Can't lock $headpath.\n\n";
+ print "Can't lock $headpath.\n\n";
close($headfile);
return '';
}
foreach my $ind (keys %header) {
print $headfile "$ind: $header{$ind}\n";
- print " $ind: $header{$ind}\n";
+ print "$ind: $header{$ind}\n";
}
print $headfile "\n";
close ($headfile);
unless (copy($archimgpath,$imgpath)) {
- print " Can't copy $archimgpath.\n\n";
+ print "Can't copy $archimgpath.\n\n";
return '';
}
print "saved.\n\n";