From 61bfe2d70cae6be8c4086a210a5451135ccca9ea Mon Sep 17 00:00:00 2001
From: Magnus Auvinen
Date: Sat, 2 Aug 2008 08:21:29 +0000
Subject: added doc tool
---
docs/tool/Modules/NaturalDocs/Parser/Native.pm | 1060 ++++++++++++++++++++++++
1 file changed, 1060 insertions(+)
create mode 100644 docs/tool/Modules/NaturalDocs/Parser/Native.pm
(limited to 'docs/tool/Modules/NaturalDocs/Parser/Native.pm')
diff --git a/docs/tool/Modules/NaturalDocs/Parser/Native.pm b/docs/tool/Modules/NaturalDocs/Parser/Native.pm
new file mode 100644
index 00000000..61ba97e5
--- /dev/null
+++ b/docs/tool/Modules/NaturalDocs/Parser/Native.pm
@@ -0,0 +1,1060 @@
+###############################################################################
+#
+# Package: NaturalDocs::Parser::Native
+#
+###############################################################################
+#
+# A package that converts comments from Natural Docs' native format into objects.
+# Unlike most second-level packages, these are packages and not object classes.
+#
+###############################################################################
+
+# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure
+# Natural Docs is licensed under the GPL
+
+
+use strict;
+use integer;
+
+package NaturalDocs::Parser::Native;
+
+
+###############################################################################
+# Group: Variables
+
+
+# Return values of TagType(). Not documented here.
+use constant POSSIBLE_OPENING_TAG => 1;
+use constant POSSIBLE_CLOSING_TAG => 2;
+use constant NOT_A_TAG => 3;
+
+
+#
+# var: package
+#
+# A representing the package normal topics will be a part of at the current point in the file. This is a package variable
+# because it needs to be reserved between function calls.
+#
+my $package;
+
+#
+# hash: functionListIgnoredHeadings
+#
+# An existence hash of all the headings that prevent the parser from creating function list symbols. Whenever one of
+# these headings are used in a function list topic, symbols are not created from definition lists until the next heading. The keys
+# are in all lowercase.
+#
+my %functionListIgnoredHeadings = ( 'parameters' => 1,
+ 'parameter' => 1,
+ 'params' => 1,
+ 'param' => 1,
+ 'arguments' => 1,
+ 'argument' => 1,
+ 'args' => 1,
+ 'arg' => 1 );
+
+
+###############################################################################
+# Group: Interface Functions
+
+
+#
+# Function: Start
+#
+# This will be called whenever a file is about to be parsed. It allows the package to reset its internal state.
+#
+sub Start
+ {
+ my ($self) = @_;
+ $package = undef;
+ };
+
+
+#
+# Function: IsMine
+#
+# Examines the comment and returns whether it is *definitely* Natural Docs content, i.e. it is owned by this package. Note
+# that a comment can fail this function and still be interpreted as a Natural Docs content, for example a JavaDoc-styled comment
+# that doesn't have header lines but no JavaDoc tags either.
+#
+# Parameters:
+#
+# commentLines - An arrayref of the comment lines. Must have been run through CleanComment()>.
+# isJavaDoc - Whether the comment was JavaDoc-styled.
+#
+# Returns:
+#
+# Whether the comment is *definitely* Natural Docs content.
+#
+sub IsMine #(string[] commentLines, bool isJavaDoc)
+ {
+ my ($self, $commentLines, $isJavaDoc) = @_;
+
+ # Skip to the first line with content.
+ my $line = 0;
+
+ while ($line < scalar @$commentLines && !length $commentLines->[$line])
+ { $line++; };
+
+ return $self->ParseHeaderLine($commentLines->[$line]);
+ };
+
+
+
+#
+# Function: ParseComment
+#
+# This will be called whenever a comment capable of containing Natural Docs content is found.
+#
+# Parameters:
+#
+# commentLines - An arrayref of the comment lines. Must have been run through CleanComment()>.
+# *The original memory will be changed.*
+# isJavaDoc - Whether the comment is JavaDoc styled.
+# lineNumber - The line number of the first of the comment lines.
+# parsedTopics - A reference to the array where any new should be placed.
+#
+# Returns:
+#
+# The number of parsed topics added to the array, or zero if none.
+#
+sub ParseComment #(commentLines, isJavaDoc, lineNumber, parsedTopics)
+ {
+ my ($self, $commentLines, $isJavaDoc, $lineNumber, $parsedTopics) = @_;
+
+ my $topicCount = 0;
+ my $prevLineBlank = 1;
+ my $inCodeSection = 0;
+
+ my ($type, $scope, $isPlural, $title, $symbol);
+ #my $package; # package variable.
+ my ($newKeyword, $newTitle);
+
+ my $index = 0;
+
+ my $bodyStart = 0;
+ my $bodyEnd = 0; # Not inclusive.
+
+ while ($index < scalar @$commentLines)
+ {
+ # Everything but leading whitespace was removed beforehand.
+
+ # If we're in a code section...
+ if ($inCodeSection)
+ {
+ if ($commentLines->[$index] =~ /^ *\( *(?:end|finish|done)(?: +(?:table|code|example|diagram))? *\)$/i)
+ { $inCodeSection = undef; };
+
+ $prevLineBlank = 0;
+ $bodyEnd++;
+ }
+
+ # If the line is empty...
+ elsif (!length($commentLines->[$index]))
+ {
+ $prevLineBlank = 1;
+
+ if ($topicCount)
+ { $bodyEnd++; };
+ }
+
+ # If the line has a recognized header and the previous line is blank...
+ elsif ($prevLineBlank && (($newKeyword, $newTitle) = $self->ParseHeaderLine($commentLines->[$index])) )
+ {
+ # Process the previous one, if any.
+
+ if ($topicCount)
+ {
+ if ($scope == ::SCOPE_START() || $scope == ::SCOPE_END())
+ { $package = undef; };
+
+ my $body = $self->FormatBody($commentLines, $bodyStart, $bodyEnd, $type, $isPlural);
+ my $newTopic = $self->MakeParsedTopic($type, $title, $package, $body, $lineNumber + $bodyStart - 1, $isPlural);
+ push @$parsedTopics, $newTopic;
+
+ $package = $newTopic->Package();
+ };
+
+ $title = $newTitle;
+
+ my $typeInfo;
+ ($type, $typeInfo, $isPlural) = NaturalDocs::Topics->KeywordInfo($newKeyword);
+ $scope = $typeInfo->Scope();
+
+ $bodyStart = $index + 1;
+ $bodyEnd = $index + 1;
+
+ $topicCount++;
+
+ $prevLineBlank = 0;
+ }
+
+ # If we're on a non-empty, non-header line of a JavaDoc-styled comment and we haven't started a topic yet...
+ elsif ($isJavaDoc && !$topicCount)
+ {
+ $type = undef;
+ $scope = ::SCOPE_NORMAL(); # The scope repair and topic merging processes will handle if this is a class topic.
+ $isPlural = undef;
+ $title = undef;
+ $symbol = undef;
+
+ $bodyStart = $index;
+ $bodyEnd = $index + 1;
+
+ $topicCount++;
+
+ $prevLineBlank = undef;
+ }
+
+ # If we're on a normal content line within a topic
+ elsif ($topicCount)
+ {
+ $prevLineBlank = 0;
+ $bodyEnd++;
+
+ if ($commentLines->[$index] =~ /^ *\( *(?:(?:start|begin)? +)?(?:table|code|example|diagram) *\)$/i)
+ { $inCodeSection = 1; };
+ };
+
+
+ $index++;
+ };
+
+
+ # Last one, if any. This is the only one that gets the prototypes.
+ if ($bodyStart)
+ {
+ if ($scope == ::SCOPE_START() || $scope == ::SCOPE_END())
+ { $package = undef; };
+
+ my $body = $self->FormatBody($commentLines, $bodyStart, $bodyEnd, $type, $isPlural);
+ my $newTopic = $self->MakeParsedTopic($type, $title, $package, $body, $lineNumber + $bodyStart - 1, $isPlural);
+ push @$parsedTopics, $newTopic;
+ $topicCount++;
+
+ $package = $newTopic->Package();
+ };
+
+ return $topicCount;
+ };
+
+
+#
+# Function: ParseHeaderLine
+#
+# If the passed line is a topic header, returns the array ( keyword, title ). Otherwise returns an empty array.
+#
+sub ParseHeaderLine #(line)
+ {
+ my ($self, $line) = @_;
+
+ if ($line =~ /^ *([a-z0-9 ]*[a-z0-9]): +(.*)$/i)
+ {
+ my ($keyword, $title) = ($1, $2);
+
+ # We need to do it this way because if you do "if (ND:T->KeywordInfo($keyword)" and the last element of the array it
+ # returns is false, the statement is false. That is really retarded, but there it is.
+ my ($type, undef, undef) = NaturalDocs::Topics->KeywordInfo($keyword);
+
+ if ($type)
+ { return ($keyword, $title); }
+ else
+ { return ( ); };
+ }
+ else
+ { return ( ); };
+ };
+
+
+
+###############################################################################
+# Group: Support Functions
+
+
+#
+# Function: MakeParsedTopic
+#
+# Creates a object for the passed parameters. Scope is gotten from
+# the package variable instead of from the parameters. The summary is generated from the body.
+#
+# Parameters:
+#
+# type - The . May be undef for headerless topics.
+# title - The title of the topic. May be undef for headerless topics.
+# package - The package the topic appears in.
+# body - The topic's body in .
+# lineNumber - The topic's line number.
+# isList - Whether the topic is a list.
+#
+# Returns:
+#
+# The object.
+#
+sub MakeParsedTopic #(type, title, package, body, lineNumber, isList)
+ {
+ my ($self, $type, $title, $package, $body, $lineNumber, $isList) = @_;
+
+ my $summary;
+
+ if (defined $body)
+ { $summary = NaturalDocs::Parser->GetSummaryFromBody($body); };
+
+ return NaturalDocs::Parser::ParsedTopic->New($type, $title, $package, undef, undef, $summary,
+ $body, $lineNumber, $isList);
+ };
+
+
+#
+# Function: FormatBody
+#
+# Converts the section body to .
+#
+# Parameters:
+#
+# commentLines - The arrayref of comment lines.
+# startingIndex - The starting index of the body to format.
+# endingIndex - The ending index of the body to format, *not* inclusive.
+# type - The type of the section. May be undef for headerless comments.
+# isList - Whether it's a list topic.
+#
+# Returns:
+#
+# The body formatted in .
+#
+sub FormatBody #(commentLines, startingIndex, endingIndex, type, isList)
+ {
+ my ($self, $commentLines, $startingIndex, $endingIndex, $type, $isList) = @_;
+
+ use constant TAG_NONE => 1;
+ use constant TAG_PARAGRAPH => 2;
+ use constant TAG_BULLETLIST => 3;
+ use constant TAG_DESCRIPTIONLIST => 4;
+ use constant TAG_HEADING => 5;
+ use constant TAG_PREFIXCODE => 6;
+ use constant TAG_TAGCODE => 7;
+
+ my %tagEnders = ( TAG_NONE() => '',
+ TAG_PARAGRAPH() => '
',
+ TAG_BULLETLIST() => '',
+ TAG_DESCRIPTIONLIST() => '',
+ TAG_HEADING() => '',
+ TAG_PREFIXCODE() => '',
+ TAG_TAGCODE() => '' );
+
+ my $topLevelTag = TAG_NONE;
+
+ my $output;
+ my $textBlock;
+ my $prevLineBlank = 1;
+
+ my $codeBlock;
+ my $removedCodeSpaces;
+
+ my $ignoreListSymbols;
+
+ my $index = $startingIndex;
+
+ while ($index < $endingIndex)
+ {
+ # If we're in a tagged code section...
+ if ($topLevelTag == TAG_TAGCODE)
+ {
+ if ($commentLines->[$index] =~ /^ *\( *(?:end|finish|done)(?: +(?:table|code|example|diagram))? *\)$/i)
+ {
+ $codeBlock =~ s/\n+$//;
+ $output .= NaturalDocs::NDMarkup->ConvertAmpChars($codeBlock) . '';
+ $codeBlock = undef;
+ $topLevelTag = TAG_NONE;
+ $prevLineBlank = undef;
+ }
+ else
+ {
+ $self->AddToCodeBlock($commentLines->[$index], \$codeBlock, \$removedCodeSpaces);
+ };
+ }
+
+ # If the line starts with a code designator...
+ elsif ($commentLines->[$index] =~ /^ *[>:|](.*)$/)
+ {
+ my $code = $1;
+
+ if ($topLevelTag == TAG_PREFIXCODE)
+ {
+ $self->AddToCodeBlock($code, \$codeBlock, \$removedCodeSpaces);
+ }
+ else # $topLevelTag != TAG_PREFIXCODE
+ {
+ if (defined $textBlock)
+ {
+ $output .= $self->RichFormatTextBlock($textBlock) . $tagEnders{$topLevelTag};
+ $textBlock = undef;
+ };
+
+ $topLevelTag = TAG_PREFIXCODE;
+ $output .= '';
+ $self->AddToCodeBlock($code, \$codeBlock, \$removedCodeSpaces);
+ };
+ }
+
+ # If we're not in either code style...
+ else
+ {
+ # Strip any leading whitespace.
+ $commentLines->[$index] =~ s/^ +//;
+
+ # If we were in a prefixed code section...
+ if ($topLevelTag == TAG_PREFIXCODE)
+ {
+ $codeBlock =~ s/\n+$//;
+ $output .= NaturalDocs::NDMarkup->ConvertAmpChars($codeBlock) . '';
+ $codeBlock = undef;
+ $topLevelTag = TAG_NONE;
+ $prevLineBlank = undef;
+ };
+
+
+ # If the line is blank...
+ if (!length($commentLines->[$index]))
+ {
+ # End a paragraph. Everything else ignores it for now.
+ if ($topLevelTag == TAG_PARAGRAPH)
+ {
+ $output .= $self->RichFormatTextBlock($textBlock) . '';
+ $textBlock = undef;
+ $topLevelTag = TAG_NONE;
+ };
+
+ $prevLineBlank = 1;
+ }
+
+ # If the line starts with a bullet...
+ elsif ($commentLines->[$index] =~ /^[-\*o+] +([^ ].*)$/ &&
+ substr($1, 0, 2) ne '- ') # Make sure "o - Something" is a definition, not a bullet.
+ {
+ my $bulletedText = $1;
+
+ if (defined $textBlock)
+ { $output .= $self->RichFormatTextBlock($textBlock); };
+
+ if ($topLevelTag == TAG_BULLETLIST)
+ {
+ $output .= '
';
+ };
+
+ return $output;
+ };
+
+
+#
+# Function: AddToCodeBlock
+#
+# Adds a line of text to a code block, handling all the indentation processing required.
+#
+# Parameters:
+#
+# line - The line of text to add.
+# codeBlockRef - A reference to the code block to add it to.
+# removedSpacesRef - A reference to a variable to hold the number of spaces removed. It needs to be stored between calls.
+# It will reset itself automatically when the code block codeBlockRef points to is undef.
+#
+sub AddToCodeBlock #(line, codeBlockRef, removedSpacesRef)
+ {
+ my ($self, $line, $codeBlockRef, $removedSpacesRef) = @_;
+
+ $line =~ /^( *)(.*)$/;
+ my ($spaces, $code) = ($1, $2);
+
+ if (!defined $$codeBlockRef)
+ {
+ if (length($code))
+ {
+ $$codeBlockRef = $code . "\n";
+ $$removedSpacesRef = length($spaces);
+ };
+ # else ignore leading line breaks.
+ }
+
+ elsif (length $code)
+ {
+ # Make sure we have the minimum amount of spaces to the left possible.
+ if (length($spaces) != $$removedSpacesRef)
+ {
+ my $spaceDifference = abs( length($spaces) - $$removedSpacesRef );
+ my $spacesToAdd = ' ' x $spaceDifference;
+
+ if (length($spaces) > $$removedSpacesRef)
+ {
+ $$codeBlockRef .= $spacesToAdd;
+ }
+ else
+ {
+ $$codeBlockRef =~ s/^(.)/$spacesToAdd . $1/gme;
+ $$removedSpacesRef = length($spaces);
+ };
+ };
+
+ $$codeBlockRef .= $code . "\n";
+ }
+
+ else # (!length $code)
+ {
+ $$codeBlockRef .= "\n";
+ };
+ };
+
+
+#
+# Function: RichFormatTextBlock
+#
+# Applies rich formatting to a chunk of text. This includes both amp chars, formatting tags, and link tags.
+#
+# Parameters:
+#
+# text - The block of text to format.
+#
+# Returns:
+#
+# The formatted text block.
+#
+sub RichFormatTextBlock #(text)
+ {
+ my ($self, $text) = @_;
+ my $output;
+
+
+ # First find bare urls, e-mail addresses, and images. We have to do this before the split because they may contain underscores
+ # or asterisks. We have to mark the tags with \x1E and \x1F so they don't get confused with angle brackets from the comment.
+ # We can't convert the amp chars beforehand because we need lookbehinds in the regexps below and they need to be
+ # constant length. Sucks, huh?
+
+ $text =~ s{
+ # The previous character can't be an alphanumeric or an opening angle bracket.
+ (?] )
+
+ }
+
+ {"\x1E" . 'email target="' . NaturalDocs::NDMarkup->ConvertAmpChars($1) . '" '
+ . 'name="' . NaturalDocs::NDMarkup->ConvertAmpChars($1) . '"' . "\x1F"}igxe;
+
+ $text =~ s{
+ # The previous character can't be an alphanumeric or an opening angle bracket.
+ (?] )
+
+ }
+
+ {"\x1E" . 'url target="' . NaturalDocs::NDMarkup->ConvertAmpChars($1) . '" '
+ . 'name="' . NaturalDocs::NDMarkup->ConvertAmpChars($1) . '"' . "\x1F"}igxe;
+
+
+ # Find image links. Inline images should already be pulled out by now.
+
+ $text =~ s{(\( *see +)([^\)]+?)( *\))}
+ {"\x1E" . 'img mode="link" target="' . NaturalDocs::NDMarkup->ConvertAmpChars($2) . '" '
+ . 'original="' . NaturalDocs::NDMarkup->ConvertAmpChars($1 . $2 . $3) . '"' . "\x1F"}gie;
+
+
+
+ # Split the text from the potential tags.
+
+ my @tempTextBlocks = split(/([\*_<>\x1E\x1F])/, $text);
+
+ # Since the symbols are considered dividers, empty strings could appear between two in a row or at the beginning/end of the
+ # array. This could seriously screw up TagType(), so we need to get rid of them.
+ my @textBlocks;
+
+ while (scalar @tempTextBlocks)
+ {
+ my $tempTextBlock = shift @tempTextBlocks;
+
+ if (length $tempTextBlock)
+ { push @textBlocks, $tempTextBlock; };
+ };
+
+
+ my $bold;
+ my $underline;
+ my $underlineHasWhitespace;
+
+ my $index = 0;
+
+ while ($index < scalar @textBlocks)
+ {
+ if ($textBlocks[$index] eq "\x1E")
+ {
+ $output .= '<';
+ $index++;
+
+ while ($textBlocks[$index] ne "\x1F")
+ {
+ $output .= $textBlocks[$index];
+ $index++;
+ };
+
+ $output .= '>';
+ }
+
+ elsif ($textBlocks[$index] eq '<' && $self->TagType(\@textBlocks, $index) == POSSIBLE_OPENING_TAG)
+ {
+ my $endingIndex = $self->ClosingTag(\@textBlocks, $index, undef);
+
+ if ($endingIndex != -1)
+ {
+ my $linkText;
+ $index++;
+
+ while ($index < $endingIndex)
+ {
+ $linkText .= $textBlocks[$index];
+ $index++;
+ };
+ # Index will be incremented again at the end of the loop.
+
+ $linkText = NaturalDocs::NDMarkup->ConvertAmpChars($linkText);
+
+ if ($linkText =~ /^(?:mailto\:)?((?:[a-z0-9\-_]+\.)*[a-z0-9\-_]+@(?:[a-z0-9\-]+\.)+[a-z]{2,4})$/i)
+ { $output .= ''; }
+ elsif ($linkText =~ /^(?:http|https|ftp|news|file)\:/i)
+ { $output .= ''; }
+ else
+ { $output .= ''; };
+ }
+
+ else # it's not a link.
+ {
+ $output .= '<';
+ };
+ }
+
+ elsif ($textBlocks[$index] eq '*')
+ {
+ my $tagType = $self->TagType(\@textBlocks, $index);
+
+ if ($tagType == POSSIBLE_OPENING_TAG && $self->ClosingTag(\@textBlocks, $index, undef) != -1)
+ {
+ # ClosingTag() makes sure tags aren't opened multiple times in a row.
+ $bold = 1;
+ $output .= '';
+ }
+ elsif ($bold && $tagType == POSSIBLE_CLOSING_TAG)
+ {
+ $bold = undef;
+ $output .= '';
+ }
+ else
+ {
+ $output .= '*';
+ };
+ }
+
+ elsif ($textBlocks[$index] eq '_')
+ {
+ my $tagType = $self->TagType(\@textBlocks, $index);
+
+ if ($tagType == POSSIBLE_OPENING_TAG && $self->ClosingTag(\@textBlocks, $index, \$underlineHasWhitespace) != -1)
+ {
+ # ClosingTag() makes sure tags aren't opened multiple times in a row.
+ $underline = 1;
+ #underlineHasWhitespace is set by ClosingTag().
+ $output .= '';
+ }
+ elsif ($underline && $tagType == POSSIBLE_CLOSING_TAG)
+ {
+ $underline = undef;
+ #underlineHasWhitespace will be reset by the next opening underline.
+ $output .= '';
+ }
+ elsif ($underline && !$underlineHasWhitespace)
+ {
+ # If there's no whitespace between underline tags, all underscores are replaced by spaces so
+ # _some_underlined_text_ becomes some underlined text. The standard _some underlined text_
+ # will work too.
+ $output .= ' ';
+ }
+ else
+ {
+ $output .= '_';
+ };
+ }
+
+ else # plain text or a > that isn't part of a link
+ {
+ $output .= NaturalDocs::NDMarkup->ConvertAmpChars($textBlocks[$index]);
+ };
+
+ $index++;
+ };
+
+ return $output;
+ };
+
+
+#
+# Function: TagType
+#
+# Returns whether the tag is a possible opening or closing tag, or neither. "Possible" because it doesn't check if an opening tag is
+# closed or a closing tag is opened, just whether the surrounding characters allow it to be a candidate for a tag. For example, in
+# "A _B" the underscore is a possible opening underline tag, but in "A_B" it is not. Support function for .
+#
+# Parameters:
+#
+# textBlocks - A reference to an array of text blocks.
+# index - The index of the tag.
+#
+# Returns:
+#
+# POSSIBLE_OPENING_TAG, POSSIBLE_CLOSING_TAG, or NOT_A_TAG.
+#
+sub TagType #(textBlocks, index)
+ {
+ my ($self, $textBlocks, $index) = @_;
+
+
+ # Possible opening tags
+
+ if ( ( $textBlocks->[$index] =~ /^[\*_<]$/ ) &&
+
+ # Before it must be whitespace, the beginning of the text, or ({["'-/*_.
+ ( $index == 0 || $textBlocks->[$index-1] =~ /[\ \t\n\(\{\[\"\'\-\/\*\_]$/ ) &&
+
+ # Notes for 2.0: Include Spanish upside down ! and ? as well as opening quotes (66) and apostrophes (6). Look into
+ # Unicode character classes as well.
+
+ # After it must be non-whitespace.
+ ( $index + 1 < scalar @$textBlocks && $textBlocks->[$index+1] !~ /^[\ \t\n]/) &&
+
+ # Make sure we don't accept <<, <=, <-, or *= as opening tags.
+ ( $textBlocks->[$index] ne '<' || $textBlocks->[$index+1] !~ /^[<=-]/ ) &&
+ ( $textBlocks->[$index] ne '*' || $textBlocks->[$index+1] !~ /^[\=\*]/ ) &&
+
+ # Make sure we don't accept * or _ before it unless it's <.
+ ( $textBlocks->[$index] eq '<' || $index == 0 || $textBlocks->[$index-1] !~ /[\*\_]$/) )
+ {
+ return POSSIBLE_OPENING_TAG;
+ }
+
+
+ # Possible closing tags
+
+ elsif ( ( $textBlocks->[$index] =~ /^[\*_>]$/) &&
+
+ # After it must be whitespace, the end of the text, or )}].,!?"';:-/*_.
+ ( $index + 1 == scalar @$textBlocks || $textBlocks->[$index+1] =~ /^[ \t\n\)\]\}\.\,\!\?\"\'\;\:\-\/\*\_]/ ||
+ # Links also get plurals, like s, es, 's, and '.
+ ( $textBlocks->[$index] eq '>' && $textBlocks->[$index+1] =~ /^(?:es|s|\')/ ) ) &&
+
+ # Notes for 2.0: Include closing quotes (99) and apostrophes (9). Look into Unicode character classes as well.
+
+ # Before it must be non-whitespace.
+ ( $index != 0 && $textBlocks->[$index-1] !~ /[ \t\n]$/ ) &&
+
+ # Make sure we don't accept >>, ->, or => as closing tags. >= is already taken care of.
+ ( $textBlocks->[$index] ne '>' || $textBlocks->[$index-1] !~ /[>=-]$/ ) &&
+
+ # Make sure we don't accept * or _ after it unless it's >.
+ ( $textBlocks->[$index] eq '>' || $textBlocks->[$index+1] !~ /[\*\_]$/) )
+ {
+ return POSSIBLE_CLOSING_TAG;
+ }
+
+ else
+ {
+ return NOT_A_TAG;
+ };
+
+ };
+
+
+#
+# Function: ClosingTag
+#
+# Returns whether a tag is closed or not, where it's closed if it is, and optionally whether there is any whitespace between the
+# tags. Support function for .
+#
+# The results of this function are in full context, meaning that if it says a tag is closed, it can be interpreted as that tag in the
+# final output. It takes into account any spoiling factors, like there being two opening tags in a row.
+#
+# Parameters:
+#
+# textBlocks - A reference to an array of text blocks.
+# index - The index of the opening tag.
+# hasWhitespaceRef - A reference to the variable that will hold whether there is whitespace between the tags or not. If
+# undef, the function will not check. If the tag is not closed, the variable will not be changed.
+#
+# Returns:
+#
+# If the tag is closed, it returns the index of the closing tag and puts whether there was whitespace between the tags in
+# hasWhitespaceRef if it was specified. If the tag is not closed, it returns -1 and doesn't touch the variable pointed to by
+# hasWhitespaceRef.
+#
+sub ClosingTag #(textBlocks, index, hasWhitespace)
+ {
+ my ($self, $textBlocks, $index, $hasWhitespaceRef) = @_;
+
+ my $hasWhitespace;
+ my $closingTag;
+
+ if ($textBlocks->[$index] eq '*' || $textBlocks->[$index] eq '_')
+ { $closingTag = $textBlocks->[$index]; }
+ elsif ($textBlocks->[$index] eq '<')
+ { $closingTag = '>'; }
+ else
+ { return -1; };
+
+ my $beginningIndex = $index;
+ $index++;
+
+ while ($index < scalar @$textBlocks)
+ {
+ if ($textBlocks->[$index] eq '<' && $self->TagType($textBlocks, $index) == POSSIBLE_OPENING_TAG)
+ {
+ # If we hit a < and we're checking whether a link is closed, it's not. The first < becomes literal and the second one
+ # becomes the new link opening.
+ if ($closingTag eq '>')
+ {
+ return -1;
+ }
+
+ # If we're not searching for the end of a link, we have to skip the link because formatting tags cannot appear within
+ # them. That's of course provided it's closed.
+ else
+ {
+ my $linkHasWhitespace;
+
+ my $endIndex = $self->ClosingTag($textBlocks, $index,
+ ($hasWhitespaceRef && !$hasWhitespace ? \$linkHasWhitespace : undef) );
+
+ if ($endIndex != -1)
+ {
+ if ($linkHasWhitespace)
+ { $hasWhitespace = 1; };
+
+ # index will be incremented again at the end of the loop, which will bring us past the link's >.
+ $index = $endIndex;
+ };
+ };
+ }
+
+ elsif ($textBlocks->[$index] eq $closingTag)
+ {
+ my $tagType = $self->TagType($textBlocks, $index);
+
+ if ($tagType == POSSIBLE_CLOSING_TAG)
+ {
+ # There needs to be something between the tags for them to count.
+ if ($index == $beginningIndex + 1)
+ { return -1; }
+ else
+ {
+ # Success!
+
+ if ($hasWhitespaceRef)
+ { $$hasWhitespaceRef = $hasWhitespace; };
+
+ return $index;
+ };
+ }
+
+ # If there are two opening tags of the same type, the first becomes literal and the next becomes part of a tag.
+ elsif ($tagType == POSSIBLE_OPENING_TAG)
+ { return -1; }
+ }
+
+ elsif ($hasWhitespaceRef && !$hasWhitespace)
+ {
+ if ($textBlocks->[$index] =~ /[ \t\n]/)
+ { $hasWhitespace = 1; };
+ };
+
+ $index++;
+ };
+
+ # Hit the end of the text blocks if we're here.
+ return -1;
+ };
+
+
+1;
--
cgit 1.4.1