]> bicyclesonthemoon.info Git - yplom/facebug1/commitdiff
Some more html processing
authorb <b@7dec801f-c475-4e67-ba99-809552d69c55>
Thu, 3 Dec 2015 21:48:27 +0000 (21:48 +0000)
committerb <b@7dec801f-c475-4e67-ba99-809552d69c55>
Thu, 3 Dec 2015 21:48:27 +0000 (21:48 +0000)
git-svn-id: svn://botcastle1b/yplom/facebug1@3 7dec801f-c475-4e67-ba99-809552d69c55

bot.1.pl

index 10ec13153520bbba6d567cdf3ffb9b99a774ba7c..3d00706ee279eb205f87b47e4b60403c1ce97768 100644 (file)
--- a/bot.1.pl
+++ b/bot.1.pl
@@ -3,7 +3,7 @@
 use strict;
 
 ###PROXY_LIB;
-use proxy_lib qw(url2path path2urldiv getcgi divideurl readconfigfile entitydecode);
+use proxy_lib qw(url2path path2urldiv getcgi divideurl readconfigfile entitydecode urldecode);
 use POSIX qw(strftime);
 
 ###ARCH_PATH;
@@ -186,6 +186,9 @@ sub processfile {
                my $mode = 'thread';
                my $level = 0;
                my $closetag=0;
+               my $ignoretext;
+               my $link;
+               my $hidename;
                
                local $/ = '<';
                unless (defined ($text = <$contentfile>)) {
@@ -217,11 +220,14 @@ sub processfile {
                                elsif (($tag{'<'} eq 'div') and ($tag{'class'} =~ /^(bj|bk)$/)) {
                                        $mode='thread-content';
                                        $level=0;
+                                       $ignoretext=1;
+                                       $hidename=0;
+                               }
+                               elsif ($tag{'<'} eq 'abbr') {
+                                       $mode = 'thread-time';
                                }
-                               
                                
                                elsif (($tag{'<'} eq 'div') and ($tag{'id'} =~ /^ufi_/)){
-                                       print "$tag{'id'} - $id\n";
                                        $mode='posts';
                                        
                                        #!!! {
@@ -251,25 +257,57 @@ sub processfile {
                        elsif ($mode eq 'thread-content') {
                                if ($tag{'<'} eq 'div') { # There should not be any sub<div>s!
                                        ++$level;
-                                       $thread{'postcontent'}.="<br>\n<br>\n";
+                                       $thread{'postcontent'}.='<div>';
                                }
                                elsif ($tag{'<'} eq '/div') {
                                        if($level){
                                                --$level;
-                                               $thread{'postcontent'}.="<br>\n<br>\n";
+                                               $thread{'postcontent'}.='<div>';
                                        }
                                        else {
                                                $mode = 'thread';
                                        }
                                }
+                               elsif ($tag{'<'} eq 'p') {
+                                       $thread{'postcontent'}.='<p>';
+                                       $ignoretext=0;
+                               }
                                elsif ($tag{'<'} eq '/p') {
-                                       $thread{'postcontent'}.="<br>\n";
+                                       $thread{'postcontent'}.='</p>';
+                                       $ignoretext=1;
+                               }
+                               elsif (!$ignoretext) {
+                                       if ($tag{'<'} eq 'a') {
+                                               if ($tag{'href'} =~ /^https?:\/\/([a-z0-9]+\.)?facebook\.com\/l\.php\?(.*&)?u=([^&]+)(&.*)?$/) {
+                                                       $thread{'postcontent'}.='<a href="'.urldecode($3).'">';
+                                                       $link=1;
+                                               }
+                                               elsif ($tag{'href'} =~ /^\/([A-Za-z0-9\.]+)(\?.*)$/) {
+                                                       $thread{'postcontent'}.='<a href="#">'.(($$names{$1} ne '')?$$names{$1}:$$names{'default'});
+                                                       $link=1;
+                                                       $hidename=1;
+                                               }
+                                       }
+                                       elsif ($tag{'<'} eq '/a') {
+                                               if($link) {
+                                                       $thread{'postcontent'}.='</a>';
+                                                       $link=0;
+                                                       $hidename=0;
+                                               }
+                                       }
+                                       # else {
+                                       # $thread{'postcontent'}.='<<<'.$tag{'<'}.'>>>';
+                                       # }
+                               }
+                               
+                       }
+                       elsif ($mode eq 'thread-time') {
+                               if ($tag{'<'} eq '/abbr') {
+                                       $mode = 'thread';
                                }
-                               # else {
-                                       # $thread{'postcontent'}.='<'.$tag{'<'}.'>';
-                               # }
                        }
                        
+                       
                        if ($tag{"\\"} ne '') {
                                $closetag = 1;
                                next;
@@ -283,7 +321,12 @@ sub processfile {
                        $text =~ s/<$//;
                        
                        if($mode eq 'thread-content') {
-                               $thread{'postcontent'}.=$text;
+                               unless ($ignoretext or $hidename){
+                                       $thread{'postcontent'}.=$text;
+                               }
+                       }
+                       elsif ($mode eq 'thread-time') {
+                               $thread{'timetext'}.=$text;
                        }
                        # print "text: $text\n";
                }