From 66eb4166251ef553f25e73329d99fab054eb4fdf Mon Sep 17 00:00:00 2001 From: b Date: Sat, 19 Aug 2023 14:47:59 +0000 Subject: [PATCH] continue bbcode conversion --- botm-common | 2 +- post_common.1.pm | 448 ++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 408 insertions(+), 42 deletions(-) diff --git a/botm-common b/botm-common index 70a5470..bdf2577 160000 --- a/botm-common +++ b/botm-common @@ -1 +1 @@ -Subproject commit 70a54705d7c930699fc7678b3b03a6f934e74397 +Subproject commit bdf2577b82d59fed0450c2a8fc61a46efe5d19eb diff --git a/post_common.1.pm b/post_common.1.pm index b173e58..2db88fb 100644 --- a/post_common.1.pm +++ b/post_common.1.pm @@ -36,7 +36,8 @@ our @EXPORT_OK = ( ###PERL_LIB: use lib '/botm/lib/post'; use botm_common ( 'system_encoded', - 'write_postdata_file' + 'write_postdata_file', + 'html_entity_encode_dec' ); ###PERL_DEFAULT_PASSWORD: use constant DEFAULT_PASSWORD => 'password'; @@ -218,55 +219,56 @@ sub prepare_post { # initialise bbtree with top level sub bbtree_init { - (my $bbtree, my $base_ind, my $print) = @_; + (my $bbtree, my $print) = @_; %$bbtree = (); - $bbtree{$base_ind.'.name'} = $base_ind; - $bbtree{$base_ind.'.type'} = 'tag'; - $bbtree{$base_ind.'.count'} = 0; - - if ($print){ - print bbtree_debug(%bbtree, $ind); - } - - return ($base_ind, 0); + return bbtree_add_tag( + $bbtree, + -1, # level + '' , # ind + '_', # name + '', # value + '', # text + $print + ); } # add new tag to bbtree, and enter new tag sub bbtree_add_tag { - (my $bbtree, my $ind, my $level, my $name, my $value, my $text, my $print) = @_; + (my $bbtree, my $level, my $ind, my $name, my $value, my $text, my $print) = @_; my $ind_count = $ind.'.count'; # index for NEW tag - my $new_ind = $ind.'.'.$bbtree->{$ind_count}; + my $new_ind = $ind.'.'.int($bbtree->{$ind_count}); # increase count of CURRENT tag $bbtree->{$ind_count} += 1; # save NEW tag properties - $bbtree->{$new_ind.'.type' } = 'tag'; - $bbtree->{$new_ind.'.name' } = $name; - $bbtree->{$new_ind.'.value' } = $value; + $bbtree->{$new_ind.'.type' } = 'tag'; + $bbtree->{$new_ind.'.name' } = $name; + $bbtree->{$new_ind.'.value'} = $value; $bbtree->{$new_ind.'.text' } = $text; # NEW tag starts empty and open - $bbtree->{$new_ind.'.count' } = 0; - $bbtree->{$new_ind.'.closed'} = 0; + $bbtree->{$new_ind.'.count' } = 0; + $bbtree->{$new_ind.'.closed' } = 0; + $bbtree->{$new_ind.'.endtext'} = ''; if ($print) { print bbtree_debug(%bbtree, $new_ind); } # enter NEW tag - return ($new_ind, $level+1); + return ($level+1, $new_ind); } # add new text to bbtree, don't enter sub bbtree_add_text { - (my $bbtree, my $ind, my $level, my $text, my $print) = @_; + (my $bbtree, my $level, my $ind, my $text, my $print) = @_; my $ind_count = $ind.'.count'; - my $count = $bbtree->{$ind_count}; + my $count = int($bbtree->{$ind_count}); # if last element of CURRENT tag is text, # then merge NEW text instead of adding separately @@ -293,12 +295,12 @@ sub bbtree_add_text { if ($print){ print bbtree_debug(%bbtree, $ind); } - return ($ind, $level); + return ($level, $ind); } # close existing tag in bbtree, return to parent tag sub bbtree_close_tag { - (my $bbtree, my $ind, my $level, my $text, my $print) = @_; + (my $bbtree, my $level, my $ind, my $text, my $print) = @_; $bbtree->{$ind.'endtext'} = $text; # mark CURRENT tag as closed @@ -311,16 +313,16 @@ sub bbtree_close_tag { # return to PARENT tag if ($level > 0) { $ind =~ s/\.[0-9]+$//; - return ($ind, $level-1); + return ($level-1, $ind); } else { - return ($ind, $level); + return ($level, $ind); } } # don't close existing tag in bbtree, return to parent tag sub bbtree_drop_tag { - (my $bbtree, my $ind, my $level, my $text, my $print) = @_; + (my $bbtree, my $level, my $ind, my $text, my $print) = @_; $bbtree->{$ind.'endtext'} = $text; # mark CURRENT tag as NOT closed @@ -333,10 +335,10 @@ sub bbtree_drop_tag { # return to PARENT tag if ($level > 0) { $ind =~ s/\.[0-9]+$//; - return ($ind, $level-1); + return ($level-1, $ind); } else { - return ($ind, $level); + return ($level, $ind); } } @@ -391,18 +393,101 @@ sub bbtree_debug { return $debug; } +# goto toplevel of bbtree +sub bbtree_start { + (my $bbtree, my $print) = @_; + + my $ind = '.0', + my $sub_ind = '', + my $level = 0; + + if ($print) { + print bbtree_info($bbtree, $ind); + } + return ($level, $ind, $sub_ind); +} + +# goto next element of bbtree +sub bbtree_next { + (my $bbtree, my $level, my $ind, my $sub_ind, my $print) = @_; + + # we're at tag start + if ($sub_ind eq '') { + # go to first element + $sub_ind = 0; + } + else { + # go to next element + $sub_ind += 1; + # ran out of elements (no need for `while` here) + if ($sub_ind > $bbtree->{$ind.'.count'}) { + # go to next element 1 level below + $level -= 1; + $ind =~ m/^(.*)\.([0-9]+$)/; + $sub_ind = $2 + 1; + $ind = $1; + } + } + + my $full_ind = $ind.'.'.$sub_ind; + if ($print) { + print '['.$full_ind."]\n"; + } + # next element is a tag; enter it + if ($bbtree->{'.type'} eq 'tag') { + $ind = $full_ind; + $sub_ind = ''; + } + + if ($print) { + print bbtree_info($bbtree, $full_ind); + } + + return ($level, $ind, $sub_ind); +} + +# full info about bbtree element +sub bbtree_info { + (my $bbtree, my $ind) = @_; + + my $info = "[$ind] "; + + if ($bbtree->{$ind.'.type'} eq 'tag') { + $info .= "tag:\n"; + $info .= 'text ='.$bbtree->{$ind.'.text'}."\n"; + $info .= 'endtext ='.$bbtree->{$ind.'.endtext'}."\n"; + $info .= 'name ='.$bbtree->{$ind.'.name'}."\n"; + $info .= 'value ='.$bbtree->{$ind.'.value'}."\n"; + $info .= 'count ='.$bbtree->{$ind.'.count'}."\n"; + $info .= 'closed ='.$bbtree->{$ind.'.closed'}."\n"; + } + elsif ($bbtree->{$ind.'.type'} eq 'text') { + $info .= 'text='.$bbtree->{$ind.'.text'}."\n"; + } + else { + $info .= "???\n"; + } + + return $info; +} + sub bbcode_to_html { - (my $cmd_options, my $post) = @_; + (my $cmd_options, my $post, my $bbcode) = @_; - my $bbcode = $post->{'content'}; # BBcode text to convert + if ($bbcode eq '') { + $bbcode = $post->{'content'}; + } my $html = ''; my $level; # how deep in the BB tree my $ind; # current BB tree element's index + my $sub_ind; # current BBcode tag element's index my %bbtree = (); # BB tree structure my $tag; # current tag, full my $tag_end; # current tag's closing mark my $tag_name; # current tag's name my $tag_value; # current tag's (optional) value + my $tag_closed; + my $tag_count; my $text; # current text my $in_code = 0; # if (and how deep) in [code] tag my $in_spoiler = 0; # if (and how deep) in [spoiler] tag @@ -413,7 +498,7 @@ sub bbcode_to_html { if ($print) { print "Build BBcode tree:\n"; } - ($ind, $level) = bbtree_init(\%bbtree, '_', $print); + ($ind, $level) = bbtree_init(\%bbtree, $print); while ($bbcode ne '') { @@ -448,7 +533,7 @@ sub bbcode_to_html { print 'text: '.$text."\n"; } # add new text to BB tree - bbtree_add_text(\%bbtree, $ind, $level, $text, $print); + bbtree_add_text(\%bbtree, $level, $ind, $text, $print); } # no tag, skip @@ -465,7 +550,7 @@ sub bbcode_to_html { print 'invalid tag name "'.$tag_name."\"\n"; } # add bad tag to BB tree as text - bbtree_add_text(\%bbtree, $ind, $level, $tag, $print); + bbtree_add_text(\%bbtree, $level, $ind, $tag, $print); # skip next; } @@ -487,7 +572,7 @@ sub bbcode_to_html { print "this tag forbidden here\n"; } # add bad tag as text - bbtree_add_text(\%bbtree, $ind, $level, $tag, $print); + bbtree_add_text(\%bbtree, $level, $ind, $tag, $print); # skip next; } @@ -501,7 +586,7 @@ sub bbcode_to_html { print "implicit [/*]\n"; } # add [/*] to BB tree - ($ind, $level) = bbtree_close_tag(\%bbtree, $ind, $level, '[/*]', $print); + ($level, $ind) = bbtree_close_tag(\%bbtree, $level, $ind, '[/*]', $print); } # [*] not in [list], forbidden elsif ($bbtree{$ind.'.name'} ne 'list') { @@ -509,14 +594,14 @@ sub bbcode_to_html { print "this tag forbidden here\n"; } # add tag to BB tree as text - bbtree_add_text(\%bbtree, $ind, $level, $tag, $print); + bbtree_add_text(\%bbtree, $level, $ind, $tag, $print); # skip next; } } # add tag to BB tree and enter - ($ind, $level) = bbtree_add_tag(\%bbtree, $ind, $level, $tag_name, $tag_value, $tag, $print); + ($level, $ind) = bbtree_add_tag(\%bbtree, $level, $ind, $tag_name, $tag_value, $tag, $print); # keep track of special case tags if ($tag_name eq 'code') { $in_code += 1; @@ -537,7 +622,7 @@ sub bbcode_to_html { print "unmatched\n"; } # add bad tag to BB tree as text - bbtree_add_text(\%bbtree, $ind, $level, $tag, $print); + bbtree_add_text(\%bbtree, $level, $ind, $tag, $print); # skip next; } @@ -549,7 +634,7 @@ sub bbcode_to_html { print "implicit [/*]\n"; } # add [/*] to BB tree - ($ind, $level) = bbtree_close_tag(\%bbtree, $ind, $level, '[/*]' $print); + ($level, $ind) = bbtree_close_tag(\%bbtree, $level, $ind, '[/*]' $print); } # handle special case tags @@ -569,17 +654,298 @@ sub bbcode_to_html { print "mismatched\n"; } # add bad tag to BB tree as text and close current tag - ($ind, $level) = bbtree_drop_tag(\%bbtree, $ind, $level, $tag, $print); + ($level, $ind) = bbtree_drop_tag(\%bbtree, $level, $ind, $tag, $print); } # close current tag - ($ind, $level) = bbtree_close_tag(\%bbtree, $ind, $level, $tag, $print); + ($level, $ind) = bbtree_close_tag(\%bbtree, $level, $ind, $tag, $print); } } + $in_code = 0; + if ($print) { print "\nGenerate HTML from BBcode tree:\n"; } + ($level, $ind, $sub_ind) = bbtree_start(\%bbtree, $print); + + while ($level >= 0) { + $tag = $bbtree{$ind.'.text'}; + $tag_end = $bbtree{$ind.'.endtext'}; + $tag_name = $bbtree{$ind.'.name'}; + $tag_value = $bbtree{$ind.'.value'}; + $tag_count = $bbtree{$ind.'.count'}; + $tag_closed = $bbtree{$ind.'.closed'}; + + # tag start + if ($sub_ind eq '') { + + if (not $tag_closed) { + if ($print) { + print "unmatched tag\n"; + } + $html .= html_entity_encode_dec($tag); + } + + elsif ($tag_name eq 'quote') { + if ($tag_value eq '') { + $html .= '
'; + #$html .= '
'; + } + else { + if ($tag_value =~ m/^"(.*)"$/gs) { + $tag_value = bbcode_to_html($cmd_options, $post, $1); + } + else { + $tag_value = 'QUOTE MARKS MISSING; '.html_entity_encode_dec($tag_value) + } + $html .= '
'.tagvalue.' wrote:'; + # $html .= '
'.tagvalue.' wrote:
'; + } + } + + elsif ($tag_name eq 'b') { + $html .= ''; + if ($tag_value ne '') { + $html .= 'INVALID VALUE; '; + } + } + + elsif ($tag_name eq 'i') { + $html .= ''; + if ($tag_value ne '') { + $html .= 'INVALID VALUE; '; + } + } + + elsif ($tag_name eq 'u') { + $html .= ''; + if ($tag_value ne '') { + $html .= 'INVALID VALUE; '; + } + } + + elsif ($tag_name eq 'code') { + if ($in_code > 0) { + if ($print) { + print "not a tag, actually\n"; + } + $html .= html_entity_encode_dec($tag); + } + else { + $html .= '
Code: Select all
'; + # $html .= '
Code: Select all
'; + } + if ($tag_value ne '') { + $html .= 'INVALID VALUE; '; + } + $in_code += 1; + } + + elsif ($tag_name eq 'img') { + if ($tag_value ne '') { + $html .= 'INVALID VALUE; '; + } + $html .= ''; + } + + elsif ($tag_name eq 'size') { + if (($tag_value =~ m/^([0-9]+)$/gs) and (int($tag_value) <= 200)) { + $html .= ''; + } + else { + $html .= ' INVALID VALUE; '; + } + } + + elsif ($tag_name eq 'color') { + if ($tag_value =~ m/^(#[0-9A-F]{6})$/gs) { + $html .= ''; + } + else { + $html .= ' INVALID VALUE; '; + } + } + + elsif ($tag_name eq 'center') { + $html .= '
'; + if ($tag_value ne '') { + $html .= 'INVALID VALUE; '; + } + } + + elsif ($tag_name eq 'right') { + $html .= '
'; + if ($tag_value ne '') { + $html .= 'INVALID VALUE; '; + } + } + + elsif ($tag_name eq 's') { + $html .= ''; + if ($tag_value ne '') { + $html .= 'INVALID VALUE; '; + } + } + + elsif ($tag_name eq 'sub') { + $html .= ''; + if ($tag_value ne '') { + $html .= 'INVALID VALUE; '; + } + } + + elsif ($tag_name eq 'sup') { + $html .= ''; + if ($tag_value ne '') { + $html .= 'INVALID VALUE; '; + } + } + + elsif ($tag_name eq 'spoiler') { + $html .= '
Spoiler:
'; + # $html .= '
Spoiler:
'; + if ($tag_value ne '') { + $html .= 'INVALID VALUE; '; + } + } + + elsif ($tag_name eq 'list') { + if ($tag_value eq '') { + $html .= '
    '; + } + else { + if ($tag_value eq '1') { + $tag_value = 'decimal'; + } + elsif ($tag_value eq 'i') { + $tag_value = 'lower-roman'; + } + elsif ($tag_value eq 'I') { + $tag_value = 'upper-roman'; + } + elsif ($tag_value eq 'a') { + $tag_value = 'lower-alpha'; + } + elsif ($tag_value eq 'A') { + $tag_value = 'upper-alpha'; + } + else { + $html .= 'INVALID VALUE; '; + $tag_value = 'decimal'; + } + $html .= '
      '; + } + } + + elsif ($tag_name eq '*') { + $html .= '
    1. '; + if ($tag_value ne '') { + $html .= 'INVALID VALUE; '; + } + } + + else { + if ($print) { + print "unknown tag\n" + } + $html .= html_entity_encode_dec($tag); + } + } + + # tag end + elsif ($sub_ind >= $tag_count) { + + if (not $tag_closed) { + if ($print) { + print "unmatched tag\n"; + } + $html .= html_entity_encode_dec($tag_end); + } + + elsif ($tag_name eq 'quote') { + $html .= '
'; + } + + elsif ($tag_name =~ m/^(b|i|u|(size)|(color))$/gs) { + $html .= ''; + } + + elsif ($tag_name eq 'code') { + $in_code -= 1; + if ($in_code > 0) { + if ($print) { + print "not a tag, actually\n"; + } + $html .= html_entity_encode_dec($tag_end); + } + else { + $html .= ''; + } + } + + elsif ($tag_name eq 'url') { + $html .= ''; + } + + elsif ($tag_name =~ /^((center)|(right))$/) { + $html .= '
'; + } + + elsif ($tag_name eq 's') { + $html .= ''; + } + + elsif ($tag_name eq 'sub') { + $html .= ''; + } + + elsif ($tag_name eq 'sup') { + $html .= ''; + } + + elsif ($tag_name eq 'spoiler') { + $html .= '
'; + } + + elsif ($tag_name eq 'list') { + if ($tag_value eq '') { + $html .= ''; + } + else { + $html .= ''; + } + } + + elsif ($tag_name eq '*') { + $html .= ''; + } + + else { + if ($print) { + print "unknown tag\n"; + } + $html .= html_entity_encode_dec($tag_end); + } + } + + # text + else { + # TODO + } + + ($level, $ind, $sub_ind) = bbtree_next(\%bbtree, $level, $ind, $sub_ind); + } } -- 2.30.2