added doc tool

author: Magnus Auvinen <magnus.auvinen@gmail.com> 2008-08-02 08:21:29 +0000
committer: Magnus Auvinen <magnus.auvinen@gmail.com> 2008-08-02 08:21:29 +0000
commit: 61bfe2d70cae6be8c4086a210a5451135ccca9ea (patch)
tree: 62bf7808b1b2bfe5f56fe1e329871fb0991d0687 /docs/tool/Modules/NaturalDocs/Parser.pm
parent: a13b94f9e0bca8ea892311d9d9e0c0bc48616ea7 (diff)
download: zcatch-61bfe2d70cae6be8c4086a210a5451135ccca9ea.tar.gz
zcatch-61bfe2d70cae6be8c4086a210a5451135ccca9ea.zip
1 files changed, 1331 insertions, 0 deletions
diff --git a/docs/tool/Modules/NaturalDocs/Parser.pm b/docs/tool/Modules/NaturalDocs/Parser.pm
new file mode 100644
index 00000000..e88cd289
--- /dev/null
+++ b/docs/tool/Modules/NaturalDocs/Parser.pm
@@ -0,0 +1,1331 @@
+###############################################################################
+#
+#   Package: NaturalDocs::Parser
+#
+###############################################################################
+#
+#   A package that coordinates source file parsing between the <NaturalDocs::Languages::Base>-derived objects and its own
+#   sub-packages such as <NaturalDocs::Parser::Native>.  Also handles sending symbols to <NaturalDocs::SymbolTable> and
+#   other generic topic processing.
+#
+#   Usage and Dependencies:
+#
+#       - Prior to use, <NaturalDocs::Settings>, <NaturalDocs::Languages>, <NaturalDocs::Project>, <NaturalDocs::SymbolTable>,
+#         and <NaturalDocs::ClassHierarchy> must be initialized.  <NaturalDocs::SymbolTable> and <NaturalDocs::ClassHierarchy>
+#         do not have to be fully resolved.
+#
+#       - Aside from that, the package is ready to use right away.  It does not have its own initialization function.
+#
+###############################################################################
+
+# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure
+# Natural Docs is licensed under the GPL
+
+use NaturalDocs::Parser::ParsedTopic;
+use NaturalDocs::Parser::Native;
+use NaturalDocs::Parser::JavaDoc;
+
+use strict;
+use integer;
+
+package NaturalDocs::Parser;
+
+
+
+###############################################################################
+# Group: Variables
+
+
+#
+#   var: sourceFile
+#
+#   The source <FileName> currently being parsed.
+#
+my $sourceFile;
+
+#
+#   var: language
+#
+#   The language object for the file, derived from <NaturalDocs::Languages::Base>.
+#
+my $language;
+
+#
+#   Array: parsedFile
+#
+#   An array of <NaturalDocs::Parser::ParsedTopic> objects.
+#
+my @parsedFile;
+
+
+#
+#   bool: parsingForInformation
+#   Whether <ParseForInformation()> was called.  If false, then <ParseForBuild()> was called.
+#
+my $parsingForInformation;
+
+
+
+###############################################################################
+# Group: Functions
+
+#
+#   Function: ParseForInformation
+#
+#   Parses the input file for information.  Will update the information about the file in <NaturalDocs::SymbolTable> and
+#   <NaturalDocs::Project>.
+#
+#   Parameters:
+#
+#       file - The <FileName> to parse.
+#
+sub ParseForInformation #(file)
+    {
+    my ($self, $file) = @_;
+    $sourceFile = $file;
+
+    $parsingForInformation = 1;
+
+    # Watch this parse so we detect any changes.
+    NaturalDocs::SymbolTable->WatchFileForChanges($sourceFile);
+    NaturalDocs::ClassHierarchy->WatchFileForChanges($sourceFile);
+    NaturalDocs::SourceDB->WatchFileForChanges($sourceFile);
+
+    my $defaultMenuTitle = $self->Parse();
+
+    foreach my $topic (@parsedFile)
+        {
+        # Add a symbol for the topic.
+
+        my $type = $topic->Type();
+        if ($type eq ::TOPIC_ENUMERATION())
+            {  $type = ::TOPIC_TYPE();  };
+
+        NaturalDocs::SymbolTable->AddSymbol($topic->Symbol(), $sourceFile, $type,
+                                                                   $topic->Prototype(), $topic->Summary());
+
+
+        # You can't put the function call directly in a while with a regex.  It has to sit in a variable to work.
+        my $body = $topic->Body();
+
+
+        # If it's a list or enum topic, add a symbol for each description list entry.
+
+        if ($topic->IsList() || $topic->Type() eq ::TOPIC_ENUMERATION())
+            {
+            # We'll hijack the enum constants to apply to non-enum behavior too.
+            my $behavior;
+
+            if ($topic->Type() eq ::TOPIC_ENUMERATION())
+                {
+                $type = ::TOPIC_CONSTANT();
+                $behavior = $language->EnumValues();
+                }
+            elsif (NaturalDocs::Topics->TypeInfo($topic->Type())->Scope() == ::SCOPE_ALWAYS_GLOBAL())
+                {
+                $behavior = ::ENUM_GLOBAL();
+                }
+            else
+                {
+                $behavior = ::ENUM_UNDER_PARENT();
+                };
+
+            while ($body =~ /<ds>([^<]+)<\/ds><dd>(.*?)<\/dd>/g)
+                {
+                my ($listTextSymbol, $listSummary) = ($1, $2);
+
+                $listTextSymbol = NaturalDocs::NDMarkup->RestoreAmpChars($listTextSymbol);
+                my $listSymbol = NaturalDocs::SymbolString->FromText($listTextSymbol);
+
+                if ($behavior == ::ENUM_UNDER_PARENT())
+                    {  $listSymbol = NaturalDocs::SymbolString->Join($topic->Package(), $listSymbol);  }
+                elsif ($behavior == ::ENUM_UNDER_TYPE())
+                    {  $listSymbol = NaturalDocs::SymbolString->Join($topic->Symbol(), $listSymbol);  };
+
+                NaturalDocs::SymbolTable->AddSymbol($listSymbol, $sourceFile, $type, undef,
+                                                                           $self->GetSummaryFromDescriptionList($listSummary));
+                };
+            };
+
+
+        # Add references in the topic.
+
+        while ($body =~ /<link target=\"([^\"]*)\" name=\"[^\"]*\" original=\"[^\"]*\">/g)
+            {
+            my $linkText = NaturalDocs::NDMarkup->RestoreAmpChars($1);
+            my $linkSymbol = NaturalDocs::SymbolString->FromText($linkText);
+
+            NaturalDocs::SymbolTable->AddReference(::REFERENCE_TEXT(), $linkSymbol,
+                                                                           $topic->Package(), $topic->Using(), $sourceFile);
+            };
+
+
+        # Add images in the topic.
+
+        while ($body =~ /<img mode=\"[^\"]*\" target=\"([^\"]+)\" original=\"[^\"]*\">/g)
+            {
+            my $target = NaturalDocs::NDMarkup->RestoreAmpChars($1);
+            NaturalDocs::ImageReferenceTable->AddReference($sourceFile, $target);
+            };
+        };
+
+    # Handle any changes to the file.
+    NaturalDocs::ClassHierarchy->AnalyzeChanges();
+    NaturalDocs::SymbolTable->AnalyzeChanges();
+    NaturalDocs::SourceDB->AnalyzeWatchedFileChanges();
+
+    # Update project on the file's characteristics.
+    my $hasContent = (scalar @parsedFile > 0);
+
+    NaturalDocs::Project->SetHasContent($sourceFile, $hasContent);
+    if ($hasContent)
+        {  NaturalDocs::Project->SetDefaultMenuTitle($sourceFile, $defaultMenuTitle);  };
+
+    # We don't need to keep this around.
+    @parsedFile = ( );
+    };
+
+
+#
+#   Function: ParseForBuild
+#
+#   Parses the input file for building, returning it as a <NaturalDocs::Parser::ParsedTopic> arrayref.
+#
+#   Note that all new and changed files should be parsed for symbols via <ParseForInformation()> before calling this function on
+#   *any* file.  The reason is that <NaturalDocs::SymbolTable> needs to know about all the symbol definitions and references to
+#   resolve them properly.
+#
+#   Parameters:
+#
+#       file - The <FileName> to parse for building.
+#
+#   Returns:
+#
+#       An arrayref of the source file as <NaturalDocs::Parser::ParsedTopic> objects.
+#
+sub ParseForBuild #(file)
+    {
+    my ($self, $file) = @_;
+    $sourceFile = $file;
+
+    $parsingForInformation = undef;
+
+    $self->Parse();
+
+    return \@parsedFile;
+    };
+
+
+
+
+###############################################################################
+# Group: Interface Functions
+
+
+#
+#   Function: OnComment
+#
+#   The function called by <NaturalDocs::Languages::Base>-derived objects when their parsers encounter a comment
+#   suitable for documentation.
+#
+#   Parameters:
+#
+#       commentLines - An arrayref of the comment's lines.  The language's comment symbols should be converted to spaces,
+#                               and there should be no line break characters at the end of each line.  *The original memory will be
+#                               changed.*
+#       lineNumber - The line number of the first of the comment lines.
+#       isJavaDoc - Whether the comment is in JavaDoc format.
+#
+#   Returns:
+#
+#       The number of topics created by this comment, or zero if none.
+#
+sub OnComment #(string[] commentLines, int lineNumber, bool isJavaDoc)
+    {
+    my ($self, $commentLines, $lineNumber, $isJavaDoc) = @_;
+
+    $self->CleanComment($commentLines);
+
+    # We check if it's definitely Natural Docs content first.  This overrides all else, since it's possible that a comment could start
+    # with a topic line yet have something that looks like a JavaDoc tag.  Natural Docs wins in this case.
+    if (NaturalDocs::Parser::Native->IsMine($commentLines, $isJavaDoc))
+        {  return NaturalDocs::Parser::Native->ParseComment($commentLines, $isJavaDoc, $lineNumber, \@parsedFile);  }
+
+    elsif (NaturalDocs::Parser::JavaDoc->IsMine($commentLines, $isJavaDoc))
+        {  return NaturalDocs::Parser::JavaDoc->ParseComment($commentLines, $isJavaDoc, $lineNumber, \@parsedFile);  }
+
+    # If the content is ambiguous and it's a JavaDoc-styled comment, treat it as Natural Docs content.
+    elsif ($isJavaDoc)
+        {  return NaturalDocs::Parser::Native->ParseComment($commentLines, $isJavaDoc, $lineNumber, \@parsedFile);  }
+    };
+
+
+#
+#   Function: OnClass
+#
+#   A function called by <NaturalDocs::Languages::Base>-derived objects when their parsers encounter a class declaration.
+#
+#   Parameters:
+#
+#       class - The <SymbolString> of the class encountered.
+#
+sub OnClass #(class)
+    {
+    my ($self, $class) = @_;
+
+    if ($parsingForInformation)
+        {  NaturalDocs::ClassHierarchy->AddClass($sourceFile, $class);  };
+    };
+
+
+#
+#   Function: OnClassParent
+#
+#   A function called by <NaturalDocs::Languages::Base>-derived objects when their parsers encounter a declaration of
+#   inheritance.
+#
+#   Parameters:
+#
+#       class - The <SymbolString> of the class we're in.
+#       parent - The <SymbolString> of the class it inherits.
+#       scope - The package <SymbolString> that the reference appeared in.
+#       using - An arrayref of package <SymbolStrings> that the reference has access to via "using" statements.
+#       resolvingFlags - Any <Resolving Flags> to be used when resolving the reference.  <RESOLVE_NOPLURAL> is added
+#                              automatically since that would never apply to source code.
+#
+sub OnClassParent #(class, parent, scope, using, resolvingFlags)
+    {
+    my ($self, $class, $parent, $scope, $using, $resolvingFlags) = @_;
+
+    if ($parsingForInformation)
+        {
+        NaturalDocs::ClassHierarchy->AddParentReference($sourceFile, $class, $parent, $scope, $using,
+                                                                                   $resolvingFlags | ::RESOLVE_NOPLURAL());
+        };
+    };
+
+
+
+###############################################################################
+# Group: Support Functions
+
+
+#   Function: Parse
+#
+#   Opens the source file and parses process.  Most of the actual parsing is done in <NaturalDocs::Languages::Base->ParseFile()>
+#   and <OnComment()>, though.
+#
+#   *Do not call externally.*  Rather, call <ParseForInformation()> or <ParseForBuild()>.
+#
+#   Returns:
+#
+#       The default menu title of the file.  Will be the <FileName> if nothing better is found.
+#
+sub Parse
+    {
+    my ($self) = @_;
+
+    NaturalDocs::Error->OnStartParsing($sourceFile);
+
+    $language = NaturalDocs::Languages->LanguageOf($sourceFile);
+    NaturalDocs::Parser::Native->Start();
+    @parsedFile = ( );
+
+    my ($autoTopics, $scopeRecord) = $language->ParseFile($sourceFile, \@parsedFile);
+
+
+    $self->AddToClassHierarchy();
+
+    $self->BreakLists();
+
+    if (defined $autoTopics)
+        {
+        if (defined $scopeRecord)
+            {  $self->RepairPackages($autoTopics, $scopeRecord);  };
+
+        $self->MergeAutoTopics($language, $autoTopics);
+        };
+
+    $self->RemoveRemainingHeaderlessTopics();
+
+
+    # We don't need to do this if there aren't any auto-topics because the only package changes would be implied by the comments.
+    if (defined $autoTopics)
+        {  $self->AddPackageDelineators();  };
+
+    if (!NaturalDocs::Settings->NoAutoGroup())
+        {  $self->MakeAutoGroups($autoTopics);  };
+
+
+    # Set the menu title.
+
+    my $defaultMenuTitle = $sourceFile;
+
+    if (scalar @parsedFile)
+        {
+        my $addFileTitle;
+
+        if (NaturalDocs::Settings->OnlyFileTitles())
+            {
+            # We still want to use the title from the topics if the first one is a file.
+            if ($parsedFile[0]->Type() eq ::TOPIC_FILE())
+                {  $addFileTitle = 0;  }
+            else
+                {  $addFileTitle = 1;  };
+            }
+        elsif (scalar @parsedFile == 1 || NaturalDocs::Topics->TypeInfo( $parsedFile[0]->Type() )->PageTitleIfFirst())
+            {  $addFileTitle = 0;  }
+        else
+            {  $addFileTitle = 1;  };
+
+        if (!$addFileTitle)
+            {
+            $defaultMenuTitle = $parsedFile[0]->Title();
+            }
+        else
+            {
+            # If the title ended up being the file name, add a leading section for it.
+
+            unshift @parsedFile,
+                       NaturalDocs::Parser::ParsedTopic->New(::TOPIC_FILE(), (NaturalDocs::File->SplitPath($sourceFile))[2],
+                                                                                  undef, undef, undef, undef, undef, 1, undef);
+            };
+        };
+
+    NaturalDocs::Error->OnEndParsing($sourceFile);
+
+    return $defaultMenuTitle;
+    };
+
+
+#
+#   Function: CleanComment
+#
+#   Removes any extraneous formatting and whitespace from the comment.  Eliminates comment boxes, horizontal lines, trailing
+#   whitespace from lines, and expands all tab characters.  It keeps leading whitespace, though, since it may be needed for
+#   example code, and blank lines, since the original line numbers are needed.
+#
+#   Parameters:
+#
+#       commentLines  - An arrayref of the comment lines to clean.  *The original memory will be changed.*  Lines should have the
+#                                language's comment symbols replaced by spaces and not have a trailing line break.
+#
+sub CleanComment #(commentLines)
+    {
+    my ($self, $commentLines) = @_;
+
+    use constant DONT_KNOW => 0;
+    use constant IS_UNIFORM => 1;
+    use constant IS_UNIFORM_IF_AT_END => 2;
+    use constant IS_NOT_UNIFORM => 3;
+
+    my $leftSide = DONT_KNOW;
+    my $rightSide = DONT_KNOW;
+    my $leftSideChar;
+    my $rightSideChar;
+
+    my $index = 0;
+    my $tabLength = NaturalDocs::Settings->TabLength();
+
+    while ($index < scalar @$commentLines)
+        {
+        # Strip trailing whitespace from the original.
+
+        $commentLines->[$index] =~ s/[ \t]+$//;
+
+
+        # Expand tabs in the original.  This method is almost six times faster than Text::Tabs' method.
+
+        my $tabIndex = index($commentLines->[$index], "\t");
+
+        while ($tabIndex != -1)
+            {
+            substr( $commentLines->[$index], $tabIndex, 1, ' ' x ($tabLength - ($tabIndex % $tabLength)) );
+            $tabIndex = index($commentLines->[$index], "\t", $tabIndex);
+            };
+
+
+        # Make a working copy and strip leading whitespace as well.  This has to be done after tabs are expanded because
+        # stripping indentation could change how far tabs are expanded.
+
+        my $line = $commentLines->[$index];
+        $line =~ s/^ +//;
+
+        # If the line is blank...
+        if (!length $line)
+            {
+            # If we have a potential vertical line, this only acceptable if it's at the end of the comment.
+            if ($leftSide == IS_UNIFORM)
+                {  $leftSide = IS_UNIFORM_IF_AT_END;  };
+            if ($rightSide == IS_UNIFORM)
+                {  $rightSide = IS_UNIFORM_IF_AT_END;  };
+            }
+
+        # If there's at least four symbols in a row, it's a horizontal line.  The second regex supports differing edge characters.  It
+        # doesn't matter if any of this matches the left and right side symbols.  The length < 256 is a sanity check, because that
+        # regexp has caused the perl regexp engine to choke on an insane line someone sent me from an automatically generated
+        # file.  It had over 10k characters on the first line, and most of them were 0x00.
+        elsif ($line =~ /^([^a-zA-Z0-9 ])\1{3,}$/ ||
+                (length $line < 256 && $line =~ /^([^a-zA-Z0-9 ])\1*([^a-zA-Z0-9 ])\2{3,}([^a-zA-Z0-9 ])\3*$/) )
+            {
+            # Ignore it.  This has no effect on the vertical line detection.  We want to keep it in the output though in case it was
+            # in a code section.
+            }
+
+        # If the line is not blank or a horizontal line...
+        else
+            {
+            # More content means any previous blank lines are no longer tolerated in vertical line detection.  They are only
+            # acceptable at the end of the comment.
+
+            if ($leftSide == IS_UNIFORM_IF_AT_END)
+                {  $leftSide = IS_NOT_UNIFORM;  };
+            if ($rightSide == IS_UNIFORM_IF_AT_END)
+                {  $rightSide = IS_NOT_UNIFORM;  };
+
+
+            # Detect vertical lines.  Lines are only lines if they are followed by whitespace or a connected horizontal line.
+            # Otherwise we may accidentally detect lines from short comments that just happen to have every first or last
+            # character the same.
+
+            if ($leftSide != IS_NOT_UNIFORM)
+                {
+                if ($line =~ /^([^a-zA-Z0-9])\1*(?: |$)/)
+                    {
+                    if ($leftSide == DONT_KNOW)
+                        {
+                        $leftSide = IS_UNIFORM;
+                        $leftSideChar = $1;
+                        }
+                    else # ($leftSide == IS_UNIFORM)  Other choices already ruled out.
+                        {
+                        if ($leftSideChar ne $1)
+                            {  $leftSide = IS_NOT_UNIFORM;  };
+                        };
+                    }
+                # We'll tolerate the lack of symbols on the left on the first line, because it may be a
+                # /* Function: Whatever
+                #  * Description.
+                #  */
+                # comment which would have the leading /* blanked out.
+                elsif ($index != 0)
+                    {
+                    $leftSide = IS_NOT_UNIFORM;
+                    };
+                };
+
+            if ($rightSide != IS_NOT_UNIFORM)
+                {
+                if ($line =~ / ([^a-zA-Z0-9])\1*$/)
+                    {
+                    if ($rightSide == DONT_KNOW)
+                        {
+                        $rightSide = IS_UNIFORM;
+                        $rightSideChar = $1;
+                        }
+                    else # ($rightSide == IS_UNIFORM)  Other choices already ruled out.
+                        {
+                        if ($rightSideChar ne $1)
+                            {  $rightSide = IS_NOT_UNIFORM;  };
+                        };
+                    }
+                else
+                    {
+                    $rightSide = IS_NOT_UNIFORM;
+                    };
+                };
+
+            # We'll remove vertical lines later if they're uniform throughout the entire comment.
+            };
+
+        $index++;
+        };
+
+
+    if ($leftSide == IS_UNIFORM_IF_AT_END)
+        {  $leftSide = IS_UNIFORM;  };
+    if ($rightSide == IS_UNIFORM_IF_AT_END)
+        {  $rightSide = IS_UNIFORM;  };
+
+
+    $index = 0;
+    my $inCodeSection = 0;
+
+    while ($index < scalar @$commentLines)
+        {
+        # Clear horizontal lines only if we're not in a code section.
+        if ($commentLines->[$index] =~ /^ *([^a-zA-Z0-9 ])\1{3,}$/ ||
+            ( length $commentLines->[$index] < 256 &&
+              $commentLines->[$index] =~ /^ *([^a-zA-Z0-9 ])\1*([^a-zA-Z0-9 ])\2{3,}([^a-zA-Z0-9 ])\3*$/ ) )
+        	{
+        	if (!$inCodeSection)
+        		{  $commentLines->[$index] = '';  }
+        	}
+
+        else
+        	{
+	        # Clear vertical lines.
+
+	        if ($leftSide == IS_UNIFORM)
+	            {
+	            # This works because every line should either start this way, be blank, or be the first line that doesn't start with a
+	            # symbol.
+	            $commentLines->[$index] =~ s/^ *([^a-zA-Z0-9 ])\1*//;
+	            };
+
+	        if ($rightSide == IS_UNIFORM)
+	            {
+	            $commentLines->[$index] =~ s/ *([^a-zA-Z0-9 ])\1*$//;
+	            };
+
+
+	        # Clear horizontal lines again if there were vertical lines.  This catches lines that were separated from the verticals by
+	        # whitespace.
+
+	        if (($leftSide == IS_UNIFORM || $rightSide == IS_UNIFORM) && !$inCodeSection)
+	            {
+	            $commentLines->[$index] =~ s/^ *([^a-zA-Z0-9 ])\1{3,}$//;
+	            $commentLines->[$index] =~ s/^ *([^a-zA-Z0-9 ])\1*([^a-zA-Z0-9 ])\2{3,}([^a-zA-Z0-9 ])\3*$//;
+	            };
+
+
+	        # Check for the start and end of code sections.  Note that this doesn't affect vertical line removal.
+
+	        if (!$inCodeSection &&
+	        	$commentLines->[$index] =~ /^ *\( *(?:(?:start|begin)? +)?(?:table|code|example|diagram) *\)$/i )
+	        	{
+	        	$inCodeSection = 1;
+	        	}
+	        elsif ($inCodeSection &&
+	        	    $commentLines->[$index] =~ /^ *\( *(?:end|finish|done)(?: +(?:table|code|example|diagram))? *\)$/i)
+	        	 {
+	        	 $inCodeSection = 0;
+	        	 }
+	        }
+
+
+        $index++;
+        };
+
+    };
+
+
+
+###############################################################################
+# Group: Processing Functions
+
+
+#
+#   Function: RepairPackages
+#
+#   Recalculates the packages for all comment topics using the auto-topics and the scope record.  Call this *before* calling
+#   <MergeAutoTopics()>.
+#
+#   Parameters:
+#
+#       autoTopics - A reference to the list of automatically generated <NaturalDocs::Parser::ParsedTopics>.
+#       scopeRecord - A reference to an array of <NaturalDocs::Languages::Advanced::ScopeChanges>.
+#
+sub RepairPackages #(autoTopics, scopeRecord)
+    {
+    my ($self, $autoTopics, $scopeRecord) = @_;
+
+    my $topicIndex = 0;
+    my $autoTopicIndex = 0;
+    my $scopeIndex = 0;
+
+    my $topic = $parsedFile[0];
+    my $autoTopic = $autoTopics->[0];
+    my $scopeChange = $scopeRecord->[0];
+
+    my $currentPackage;
+    my $inFakePackage;
+
+    while (defined $topic)
+        {
+        # First update the scope via the record if its defined and has the lowest line number.
+        if (defined $scopeChange &&
+            $scopeChange->LineNumber() <= $topic->LineNumber() &&
+            (!defined $autoTopic || $scopeChange->LineNumber() <= $autoTopic->LineNumber()) )
+            {
+            $currentPackage = $scopeChange->Scope();
+            $scopeIndex++;
+            $scopeChange = $scopeRecord->[$scopeIndex];  # Will be undef when past end.
+            $inFakePackage = undef;
+            }
+
+        # Next try to end a fake scope with an auto topic if its defined and has the lowest line number.
+        elsif (defined $autoTopic &&
+                $autoTopic->LineNumber() <= $topic->LineNumber())
+            {
+            if ($inFakePackage)
+                {
+                $currentPackage = $autoTopic->Package();
+                $inFakePackage = undef;
+                };
+
+            $autoTopicIndex++;
+            $autoTopic = $autoTopics->[$autoTopicIndex];  # Will be undef when past end.
+            }
+
+
+        # Finally try to handle the topic, since it has the lowest line number.  Check for Type() because headerless topics won't have
+        # one.
+        else
+            {
+            my $scope;
+            if ($topic->Type())
+                {  $scope = NaturalDocs::Topics->TypeInfo($topic->Type())->Scope();  }
+            else
+                {  $scope = ::SCOPE_NORMAL();  };
+
+            if ($scope == ::SCOPE_START() || $scope == ::SCOPE_END())
+                {
+                # They should already have the correct class and scope.
+                $currentPackage = $topic->Package();
+                $inFakePackage = 1;
+                }
+           else
+                {
+                # Fix the package of everything else.
+
+                # Note that the first function or variable topic to appear in a fake package will assume that package even if it turns out
+                # to be incorrect in the actual code, since the topic will come before the auto-topic.  This will be corrected in
+                # MergeAutoTopics().
+
+                $topic->SetPackage($currentPackage);
+                };
+
+            $topicIndex++;
+            $topic = $parsedFile[$topicIndex];  # Will be undef when past end.
+            };
+        };
+
+    };
+
+
+#
+#   Function: MergeAutoTopics
+#
+#   Merges the automatically generated topics into the file.  If an auto-topic matches an existing topic, it will have it's prototype
+#   and package transferred.  If it doesn't, the auto-topic will be inserted into the list unless
+#   <NaturalDocs::Settings->DocumentedOnly()> is set.  If an existing topic doesn't have a title, it's assumed to be a headerless
+#   comment and will be merged with the next auto-topic or discarded.
+#
+#   Parameters:
+#
+#       language - The <NaturalDocs::Languages::Base>-derived class for the file.
+#       autoTopics - A reference to the list of automatically generated topics.
+#
+sub MergeAutoTopics #(language, autoTopics)
+    {
+    my ($self, $language, $autoTopics) = @_;
+
+    my $topicIndex = 0;
+    my $autoTopicIndex = 0;
+
+    # Keys are topic types, values are existence hashrefs of titles.
+    my %topicsInLists;
+
+    while ($topicIndex < scalar @parsedFile && $autoTopicIndex < scalar @$autoTopics)
+        {
+        my $topic = $parsedFile[$topicIndex];
+        my $autoTopic = $autoTopics->[$autoTopicIndex];
+
+        my $cleanTitle = $topic->Title();
+        $cleanTitle =~ s/[\t ]*\([^\(]*$//;
+
+        # Add the auto-topic if it's higher in the file than the current topic.
+        if ($autoTopic->LineNumber() < $topic->LineNumber())
+            {
+            if (exists $topicsInLists{$autoTopic->Type()} &&
+                exists $topicsInLists{$autoTopic->Type()}->{$autoTopic->Title()})
+                {
+                # Remove it from the list so a second one with the same name will be added.
+                delete $topicsInLists{$autoTopic->Type()}->{$autoTopic->Title()};
+                }
+            elsif (!NaturalDocs::Settings->DocumentedOnly())
+                {
+                splice(@parsedFile, $topicIndex, 0, $autoTopic);
+                $topicIndex++;
+                };
+
+            $autoTopicIndex++;
+            }
+
+        # Remove a headerless topic if there's another topic between it and the next auto-topic.
+        elsif (!$topic->Title() && $topicIndex + 1 < scalar @parsedFile &&
+                $parsedFile[$topicIndex+1]->LineNumber() < $autoTopic->LineNumber())
+            {
+            splice(@parsedFile, $topicIndex, 1);
+            }
+
+        # Transfer information if we have a match or a headerless topic.
+        elsif ( !$topic->Title() || ($topic->Type() == $autoTopic->Type() && index($autoTopic->Title(), $cleanTitle) != -1) )
+            {
+            $topic->SetType($autoTopic->Type());
+            $topic->SetPrototype($autoTopic->Prototype());
+            $topic->SetUsing($autoTopic->Using());
+
+            if (!$topic->Title())
+                {  $topic->SetTitle($autoTopic->Title());  };
+
+            if (NaturalDocs::Topics->TypeInfo($topic->Type())->Scope() != ::SCOPE_START())
+                {  $topic->SetPackage($autoTopic->Package());  }
+            elsif ($autoTopic->Package() ne $topic->Package())
+                {
+                my @autoPackageIdentifiers = NaturalDocs::SymbolString->IdentifiersOf($autoTopic->Package());
+                my @packageIdentifiers = NaturalDocs::SymbolString->IdentifiersOf($topic->Package());
+
+                while (scalar @autoPackageIdentifiers && $autoPackageIdentifiers[-1] eq $packageIdentifiers[-1])
+                    {
+                    pop @autoPackageIdentifiers;
+                    pop @packageIdentifiers;
+                    };
+
+                if (scalar @autoPackageIdentifiers)
+                    {  $topic->SetPackage( NaturalDocs::SymbolString->Join(@autoPackageIdentifiers) );  };
+                };
+
+            $topicIndex++;
+            $autoTopicIndex++;
+            }
+
+        # Extract topics in lists.
+        elsif ($topic->IsList())
+            {
+            if (!exists $topicsInLists{$topic->Type()})
+                {  $topicsInLists{$topic->Type()} = { };  };
+
+            my $body = $topic->Body();
+
+            while ($body =~ /<ds>([^<]+)<\/ds>/g)
+                {  $topicsInLists{$topic->Type()}->{NaturalDocs::NDMarkup->RestoreAmpChars($1)} = 1;  };
+
+            $topicIndex++;
+            }
+
+        # Otherwise there's no match.  Skip the topic.  The auto-topic will be added later.
+        else
+            {
+            $topicIndex++;
+            }
+        };
+
+    # Add any auto-topics remaining.
+    if (!NaturalDocs::Settings->DocumentedOnly())
+    	{
+	    while ($autoTopicIndex < scalar @$autoTopics)
+	        {
+	        my $autoTopic = $autoTopics->[$autoTopicIndex];
+
+	        if (exists $topicsInLists{$autoTopic->Type()} &&
+	            exists $topicsInLists{$autoTopic->Type()}->{$autoTopic->Title()})
+	            {
+	            # Remove it from the list so a second one with the same name will be added.
+	            delete $topicsInLists{$autoTopic->Type()}->{$autoTopic->Title()};
+	            }
+	        else
+	            {
+	            push(@parsedFile, $autoTopic);
+	            };
+
+	        $autoTopicIndex++;
+	        };
+        };
+   };
+
+
+#
+#   Function: RemoveRemainingHeaderlessTopics
+#
+#   After <MergeAutoTopics()> is done, this function removes any remaining headerless topics from the file.  If they don't merge
+#   into anything, they're not valid topics.
+#
+sub RemoveRemainingHeaderlessTopics
+    {
+    my ($self) = @_;
+
+    my $index = 0;
+    while ($index < scalar @parsedFile)
+        {
+        if ($parsedFile[$index]->Title())
+            {  $index++;  }
+        else
+            {  splice(@parsedFile, $index, 1);  };
+        };
+    };
+
+
+#
+#   Function: MakeAutoGroups
+#
+#   Creates group topics for files that do not have them.
+#
+sub MakeAutoGroups
+    {
+    my ($self) = @_;
+
+    # No groups only one topic.
+    if (scalar @parsedFile < 2)
+        {  return;  };
+
+    my $index = 0;
+    my $startStretch = 0;
+
+    # Skip the first entry if its the page title.
+    if (NaturalDocs::Topics->TypeInfo( $parsedFile[0]->Type() )->PageTitleIfFirst())
+        {
+        $index = 1;
+        $startStretch = 1;
+        };
+
+    # Make auto-groups for each stretch between scope-altering topics.
+    while ($index < scalar @parsedFile)
+        {
+        my $scope = NaturalDocs::Topics->TypeInfo($parsedFile[$index]->Type())->Scope();
+
+        if ($scope == ::SCOPE_START() || $scope == ::SCOPE_END())
+            {
+            if ($index > $startStretch)
+                {  $index += $self->MakeAutoGroupsFor($startStretch, $index);  };
+
+            $startStretch = $index + 1;
+            };
+
+        $index++;
+        };
+
+    if ($index > $startStretch)
+        {  $self->MakeAutoGroupsFor($startStretch, $index);  };
+    };
+
+
+#
+#   Function: MakeAutoGroupsFor
+#
+#   Creates group topics for sections of files that do not have them.  A support function for <MakeAutoGroups()>.
+#
+#   Parameters:
+#
+#       startIndex - The index to start at.
+#       endIndex - The index to end at.  Not inclusive.
+#
+#   Returns:
+#
+#       The number of group topics added.
+#
+sub MakeAutoGroupsFor #(startIndex, endIndex)
+    {
+    my ($self, $startIndex, $endIndex) = @_;
+
+    # No groups if any are defined already.
+    for (my $i = $startIndex; $i < $endIndex; $i++)
+        {
+        if ($parsedFile[$i]->Type() eq ::TOPIC_GROUP())
+            {  return 0;  };
+        };
+
+
+    use constant COUNT => 0;
+    use constant TYPE => 1;
+    use constant SECOND_TYPE => 2;
+    use constant SIZE => 3;
+
+    # This is an array of ( count, type, secondType ) triples.  Count and Type will always be filled in; count is the number of
+    # consecutive topics of type.  On the second pass, if small groups are combined secondType will be filled in.  There will not be
+    # more than two types per group.
+    my @groups;
+    my $groupIndex = 0;
+
+
+    # First pass: Determine all the groups.
+
+    my $i = $startIndex;
+    my $currentType;
+
+    while ($i < $endIndex)
+        {
+        if (!defined $currentType || ($parsedFile[$i]->Type() ne $currentType && $parsedFile[$i]->Type() ne ::TOPIC_GENERIC()) )
+            {
+            if (defined $currentType)
+                {  $groupIndex += SIZE;  };
+
+            $currentType = $parsedFile[$i]->Type();
+
+            $groups[$groupIndex + COUNT] = 1;
+            $groups[$groupIndex + TYPE] = $currentType;
+            }
+        else
+            {  $groups[$groupIndex + COUNT]++;  };
+
+        $i++;
+        };
+
+
+    # Second pass: Combine groups based on "noise".  Noise means types go from A to B to A at least once, and there are at least
+    # two groups in a row with three or less, and at least one of those groups is two or less.  So 3, 3, 3 doesn't count as noise, but
+    # 3, 2, 3 does.
+
+    $groupIndex = 0;
+
+    # While there are at least three groups left...
+    while ($groupIndex < scalar @groups - (2 * SIZE))
+        {
+        # If the group two places in front of this one has the same type...
+        if ($groups[$groupIndex + (2 * SIZE) + TYPE] eq $groups[$groupIndex + TYPE])
+            {
+            # It means we went from A to B to A, which partially qualifies as noise.
+
+            my $firstType = $groups[$groupIndex + TYPE];
+            my $secondType = $groups[$groupIndex + SIZE + TYPE];
+
+            if (NaturalDocs::Topics->TypeInfo($firstType)->CanGroupWith($secondType) ||
+                NaturalDocs::Topics->TypeInfo($secondType)->CanGroupWith($firstType))
+                {
+                my $hasNoise;
+
+                my $hasThrees;
+                my $hasTwosOrOnes;
+
+                my $endIndex = $groupIndex;
+
+                while ($endIndex < scalar @groups &&
+                         ($groups[$endIndex + TYPE] eq $firstType || $groups[$endIndex + TYPE] eq $secondType))
+                    {
+                    if ($groups[$endIndex + COUNT] > 3)
+                        {
+                        # They must be consecutive to count.
+                        $hasThrees = 0;
+                        $hasTwosOrOnes = 0;
+                        }
+                    elsif ($groups[$endIndex + COUNT] == 3)
+                        {
+                        $hasThrees = 1;
+
+                        if ($hasTwosOrOnes)
+                            {  $hasNoise = 1;  };
+                        }
+                    else # < 3
+                        {
+                        if ($hasThrees || $hasTwosOrOnes)
+                            {  $hasNoise = 1;  };
+
+                        $hasTwosOrOnes = 1;
+                        };
+
+                    $endIndex += SIZE;
+                    };
+
+                if (!$hasNoise)
+                    {
+                    $groupIndex = $endIndex - SIZE;
+                    }
+                else # hasNoise
+                    {
+                    $groups[$groupIndex + SECOND_TYPE] = $secondType;
+
+                    for (my $noiseIndex = $groupIndex + SIZE; $noiseIndex < $endIndex; $noiseIndex += SIZE)
+                        {
+                        $groups[$groupIndex + COUNT] += $groups[$noiseIndex + COUNT];
+                        };
+
+                    splice(@groups, $groupIndex + SIZE, $endIndex - $groupIndex - SIZE);
+
+                    $groupIndex += SIZE;
+                    };
+                }
+
+            else # They can't group together
+                {
+                $groupIndex += SIZE;
+                };
+            }
+
+        else
+            {  $groupIndex += SIZE;  };
+        };
+
+
+    # Finally, create group topics for the parsed file.
+
+    $groupIndex = 0;
+    $i = $startIndex;
+
+    while ($groupIndex < scalar @groups)
+        {
+        if ($groups[$groupIndex + TYPE] ne ::TOPIC_GENERIC())
+            {
+            my $topic = $parsedFile[$i];
+            my $title = NaturalDocs::Topics->NameOfType($groups[$groupIndex + TYPE], 1);
+
+            if (defined $groups[$groupIndex + SECOND_TYPE])
+                {  $title .= ' and ' . NaturalDocs::Topics->NameOfType($groups[$groupIndex + SECOND_TYPE], 1);  };
+
+            splice(@parsedFile, $i, 0, NaturalDocs::Parser::ParsedTopic->New(::TOPIC_GROUP(),
+                                                                                                            $title,
+                                                                                                            $topic->Package(), $topic->Using(),
+                                                                                                            undef, undef, undef,
+                                                                                                            $topic->LineNumber()) );
+            $i++;
+            };
+
+        $i += $groups[$groupIndex + COUNT];
+        $groupIndex += SIZE;
+        };
+
+    return (scalar @groups / SIZE);
+    };
+
+
+#
+#   Function: AddToClassHierarchy
+#
+#   Adds any class topics to the class hierarchy, since they may not have been called with <OnClass()> if they didn't match up to
+#   an auto-topic.
+#
+sub AddToClassHierarchy
+    {
+    my ($self) = @_;
+
+    foreach my $topic (@parsedFile)
+        {
+        if ($topic->Type() && NaturalDocs::Topics->TypeInfo( $topic->Type() )->ClassHierarchy())
+            {
+            if ($topic->IsList())
+                {
+                my $body = $topic->Body();
+
+                while ($body =~ /<ds>([^<]+)<\/ds>/g)
+                    {
+                    $self->OnClass( NaturalDocs::SymbolString->FromText( NaturalDocs::NDMarkup->RestoreAmpChars($1) ) );
+                    };
+                }
+            else
+                {
+                $self->OnClass($topic->Package());
+                };
+            };
+        };
+    };
+
+
+#
+#   Function: AddPackageDelineators
+#
+#   Adds section and class topics to make sure the package is correctly represented in the documentation.  Should be called last in
+#   this process.
+#
+sub AddPackageDelineators
+    {
+    my ($self) = @_;
+
+    my $index = 0;
+    my $currentPackage;
+
+    # Values are the arrayref [ title, type ];
+    my %usedPackages;
+
+    while ($index < scalar @parsedFile)
+        {
+        my $topic = $parsedFile[$index];
+
+        if ($topic->Package() ne $currentPackage)
+            {
+            $currentPackage = $topic->Package();
+            my $scopeType = NaturalDocs::Topics->TypeInfo($topic->Type())->Scope();
+
+            if ($scopeType == ::SCOPE_START())
+                {
+                $usedPackages{$currentPackage} = [ $topic->Title(), $topic->Type() ];
+                }
+            elsif ($scopeType == ::SCOPE_END())
+                {
+                my $newTopic;
+
+                if (!defined $currentPackage)
+                    {
+                    $newTopic = NaturalDocs::Parser::ParsedTopic->New(::TOPIC_SECTION(), 'Global',
+                                                                                                   undef, undef,
+                                                                                                   undef, undef, undef,
+                                                                                                   $topic->LineNumber(), undef);
+                    }
+                else
+                    {
+                    my ($title, $body, $summary, $type);
+                    my @packageIdentifiers = NaturalDocs::SymbolString->IdentifiersOf($currentPackage);
+
+                    if (exists $usedPackages{$currentPackage})
+                        {
+                        $title = $usedPackages{$currentPackage}->[0];
+                        $type = $usedPackages{$currentPackage}->[1];
+                        $body = '<p>(continued)</p>';
+                        $summary = '(continued)';
+                        }
+                    else
+                        {
+                        $title = join($language->PackageSeparator(), @packageIdentifiers);
+                        $type = ::TOPIC_CLASS();
+
+                        # Body and summary stay undef.
+
+                        $usedPackages{$currentPackage} = $title;
+                        };
+
+                    my @titleIdentifiers = NaturalDocs::SymbolString->IdentifiersOf( NaturalDocs::SymbolString->FromText($title) );
+                    for (my $i = 0; $i < scalar @titleIdentifiers; $i++)
+                        {  pop @packageIdentifiers;  };
+
+                    $newTopic = NaturalDocs::Parser::ParsedTopic->New($type, $title,
+                                                                                                   NaturalDocs::SymbolString->Join(@packageIdentifiers), undef,
+                                                                                                   undef, $summary, $body,
+                                                                                                   $topic->LineNumber(), undef);
+                    }
+
+                splice(@parsedFile, $index, 0, $newTopic);
+                $index++;
+                }
+            };
+
+        $index++;
+        };
+    };
+
+
+#
+#   Function: BreakLists
+#
+#   Breaks list topics into individual topics.
+#
+sub BreakLists
+    {
+    my $self = shift;
+
+    my $index = 0;
+
+    while ($index < scalar @parsedFile)
+        {
+        my $topic = $parsedFile[$index];
+
+        if ($topic->IsList() && NaturalDocs::Topics->TypeInfo( $topic->Type() )->BreakLists())
+            {
+            my $body = $topic->Body();
+
+            my @newTopics;
+            my $newBody;
+
+            my $bodyIndex = 0;
+
+            for (;;)
+                {
+                my $startList = index($body, '<dl>', $bodyIndex);
+
+                if ($startList == -1)
+                    {  last;  };
+
+                $newBody .= substr($body, $bodyIndex, $startList - $bodyIndex);
+
+                my $endList = index($body, '</dl>', $startList);
+                my $listBody = substr($body, $startList, $endList - $startList);
+
+                while ($listBody =~ /<ds>([^<]+)<\/ds><dd>(.*?)<\/dd>/g)
+                    {
+                    my ($symbol, $description) = ($1, $2);
+
+                    push @newTopics, NaturalDocs::Parser::ParsedTopic->New( $topic->Type(), $symbol, $topic->Package(),
+                                                                                                            $topic->Using(), undef,
+                                                                                                            $self->GetSummaryFromDescriptionList($description),
+                                                                                                            '<p>' . $description .  '</p>', $topic->LineNumber(),
+                                                                                                            undef );
+                    };
+
+                $bodyIndex = $endList + 5;
+                };
+
+            $newBody .= substr($body, $bodyIndex);
+
+            # Remove trailing headings.
+            $newBody =~ s/(?:<h>[^<]+<\/h>)+$//;
+
+            # Remove empty headings.
+            $newBody =~ s/(?:<h>[^<]+<\/h>)+(<h>[^<]+<\/h>)/$1/g;
+
+            if ($newBody)
+                {
+                unshift @newTopics, NaturalDocs::Parser::ParsedTopic->New( ::TOPIC_GROUP(), $topic->Title(), $topic->Package(),
+                                                                                                          $topic->Using(), undef,
+                                                                                                          $self->GetSummaryFromBody($newBody), $newBody,
+                                                                                                          $topic->LineNumber(), undef );
+                };
+
+            splice(@parsedFile, $index, 1, @newTopics);
+
+            $index += scalar @newTopics;
+            }
+
+        else # not a list
+            {  $index++;  };
+        };
+    };
+
+
+#
+#   Function: GetSummaryFromBody
+#
+#   Returns the summary text from the topic body.
+#
+#   Parameters:
+#
+#       body - The complete topic body, in <NDMarkup>.
+#
+#   Returns:
+#
+#       The topic summary, or undef if none.
+#
+sub GetSummaryFromBody #(body)
+    {
+    my ($self, $body) = @_;
+
+    my $summary;
+
+    # Extract the first sentence from the leading paragraph, if any.  We'll tolerate a single header beforehand, but nothing else.
+
+    if ($body =~ /^(?:<h>[^<]*<\/h>)?<p>(.*?)(<\/p>|[\.\!\?](?:[\)\}\'\ ]|&quot;|&gt;))/x)
+        {
+        $summary = $1;
+
+        if ($2 ne '</p>')
+            {  $summary .= $2;  };
+        };
+
+    return $summary;
+    };
+
+
+#
+#   Function: GetSummaryFromDescriptionList
+#
+#   Returns the summary text from a description list entry.
+#
+#   Parameters:
+#
+#       description - The description in <NDMarkup>.  Should be the content between the <dd></dd> tags only.
+#
+#   Returns:
+#
+#       The description summary, or undef if none.
+#
+sub GetSummaryFromDescriptionList #(description)
+    {
+    my ($self, $description) = @_;
+
+    my $summary;
+
+    if ($description =~ /^(.*?)($|[\.\!\?](?:[\)\}\'\ ]|&quot;|&gt;))/)
+        {  $summary = $1 . $2;  };
+
+    return $summary;
+    };
+
+
+1;
author	Magnus Auvinen <magnus.auvinen@gmail.com>	2008-08-02 08:21:29 +0000
committer	Magnus Auvinen <magnus.auvinen@gmail.com>	2008-08-02 08:21:29 +0000
commit	61bfe2d70cae6be8c4086a210a5451135ccca9ea (patch)
tree	62bf7808b1b2bfe5f56fe1e329871fb0991d0687 /docs/tool/Modules/NaturalDocs/Parser.pm
parent	a13b94f9e0bca8ea892311d9d9e0c0bc48616ea7 (diff)
download	zcatch-61bfe2d70cae6be8c4086a210a5451135ccca9ea.tar.gz zcatch-61bfe2d70cae6be8c4086a210a5451135ccca9ea.zip