diff options
| author | Magnus Auvinen <magnus.auvinen@gmail.com> | 2008-08-02 08:21:29 +0000 |
|---|---|---|
| committer | Magnus Auvinen <magnus.auvinen@gmail.com> | 2008-08-02 08:21:29 +0000 |
| commit | 61bfe2d70cae6be8c4086a210a5451135ccca9ea (patch) | |
| tree | 62bf7808b1b2bfe5f56fe1e329871fb0991d0687 /docs/tool/Modules/NaturalDocs/Parser.pm | |
| parent | a13b94f9e0bca8ea892311d9d9e0c0bc48616ea7 (diff) | |
| download | zcatch-61bfe2d70cae6be8c4086a210a5451135ccca9ea.tar.gz zcatch-61bfe2d70cae6be8c4086a210a5451135ccca9ea.zip | |
added doc tool
Diffstat (limited to 'docs/tool/Modules/NaturalDocs/Parser.pm')
| -rw-r--r-- | docs/tool/Modules/NaturalDocs/Parser.pm | 1331 |
1 files changed, 1331 insertions, 0 deletions
diff --git a/docs/tool/Modules/NaturalDocs/Parser.pm b/docs/tool/Modules/NaturalDocs/Parser.pm new file mode 100644 index 00000000..e88cd289 --- /dev/null +++ b/docs/tool/Modules/NaturalDocs/Parser.pm @@ -0,0 +1,1331 @@ +############################################################################### +# +# Package: NaturalDocs::Parser +# +############################################################################### +# +# A package that coordinates source file parsing between the <NaturalDocs::Languages::Base>-derived objects and its own +# sub-packages such as <NaturalDocs::Parser::Native>. Also handles sending symbols to <NaturalDocs::SymbolTable> and +# other generic topic processing. +# +# Usage and Dependencies: +# +# - Prior to use, <NaturalDocs::Settings>, <NaturalDocs::Languages>, <NaturalDocs::Project>, <NaturalDocs::SymbolTable>, +# and <NaturalDocs::ClassHierarchy> must be initialized. <NaturalDocs::SymbolTable> and <NaturalDocs::ClassHierarchy> +# do not have to be fully resolved. +# +# - Aside from that, the package is ready to use right away. It does not have its own initialization function. +# +############################################################################### + +# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure +# Natural Docs is licensed under the GPL + +use NaturalDocs::Parser::ParsedTopic; +use NaturalDocs::Parser::Native; +use NaturalDocs::Parser::JavaDoc; + +use strict; +use integer; + +package NaturalDocs::Parser; + + + +############################################################################### +# Group: Variables + + +# +# var: sourceFile +# +# The source <FileName> currently being parsed. +# +my $sourceFile; + +# +# var: language +# +# The language object for the file, derived from <NaturalDocs::Languages::Base>. +# +my $language; + +# +# Array: parsedFile +# +# An array of <NaturalDocs::Parser::ParsedTopic> objects. +# +my @parsedFile; + + +# +# bool: parsingForInformation +# Whether <ParseForInformation()> was called. If false, then <ParseForBuild()> was called. +# +my $parsingForInformation; + + + +############################################################################### +# Group: Functions + +# +# Function: ParseForInformation +# +# Parses the input file for information. Will update the information about the file in <NaturalDocs::SymbolTable> and +# <NaturalDocs::Project>. +# +# Parameters: +# +# file - The <FileName> to parse. +# +sub ParseForInformation #(file) + { + my ($self, $file) = @_; + $sourceFile = $file; + + $parsingForInformation = 1; + + # Watch this parse so we detect any changes. + NaturalDocs::SymbolTable->WatchFileForChanges($sourceFile); + NaturalDocs::ClassHierarchy->WatchFileForChanges($sourceFile); + NaturalDocs::SourceDB->WatchFileForChanges($sourceFile); + + my $defaultMenuTitle = $self->Parse(); + + foreach my $topic (@parsedFile) + { + # Add a symbol for the topic. + + my $type = $topic->Type(); + if ($type eq ::TOPIC_ENUMERATION()) + { $type = ::TOPIC_TYPE(); }; + + NaturalDocs::SymbolTable->AddSymbol($topic->Symbol(), $sourceFile, $type, + $topic->Prototype(), $topic->Summary()); + + + # You can't put the function call directly in a while with a regex. It has to sit in a variable to work. + my $body = $topic->Body(); + + + # If it's a list or enum topic, add a symbol for each description list entry. + + if ($topic->IsList() || $topic->Type() eq ::TOPIC_ENUMERATION()) + { + # We'll hijack the enum constants to apply to non-enum behavior too. + my $behavior; + + if ($topic->Type() eq ::TOPIC_ENUMERATION()) + { + $type = ::TOPIC_CONSTANT(); + $behavior = $language->EnumValues(); + } + elsif (NaturalDocs::Topics->TypeInfo($topic->Type())->Scope() == ::SCOPE_ALWAYS_GLOBAL()) + { + $behavior = ::ENUM_GLOBAL(); + } + else + { + $behavior = ::ENUM_UNDER_PARENT(); + }; + + while ($body =~ /<ds>([^<]+)<\/ds><dd>(.*?)<\/dd>/g) + { + my ($listTextSymbol, $listSummary) = ($1, $2); + + $listTextSymbol = NaturalDocs::NDMarkup->RestoreAmpChars($listTextSymbol); + my $listSymbol = NaturalDocs::SymbolString->FromText($listTextSymbol); + + if ($behavior == ::ENUM_UNDER_PARENT()) + { $listSymbol = NaturalDocs::SymbolString->Join($topic->Package(), $listSymbol); } + elsif ($behavior == ::ENUM_UNDER_TYPE()) + { $listSymbol = NaturalDocs::SymbolString->Join($topic->Symbol(), $listSymbol); }; + + NaturalDocs::SymbolTable->AddSymbol($listSymbol, $sourceFile, $type, undef, + $self->GetSummaryFromDescriptionList($listSummary)); + }; + }; + + + # Add references in the topic. + + while ($body =~ /<link target=\"([^\"]*)\" name=\"[^\"]*\" original=\"[^\"]*\">/g) + { + my $linkText = NaturalDocs::NDMarkup->RestoreAmpChars($1); + my $linkSymbol = NaturalDocs::SymbolString->FromText($linkText); + + NaturalDocs::SymbolTable->AddReference(::REFERENCE_TEXT(), $linkSymbol, + $topic->Package(), $topic->Using(), $sourceFile); + }; + + + # Add images in the topic. + + while ($body =~ /<img mode=\"[^\"]*\" target=\"([^\"]+)\" original=\"[^\"]*\">/g) + { + my $target = NaturalDocs::NDMarkup->RestoreAmpChars($1); + NaturalDocs::ImageReferenceTable->AddReference($sourceFile, $target); + }; + }; + + # Handle any changes to the file. + NaturalDocs::ClassHierarchy->AnalyzeChanges(); + NaturalDocs::SymbolTable->AnalyzeChanges(); + NaturalDocs::SourceDB->AnalyzeWatchedFileChanges(); + + # Update project on the file's characteristics. + my $hasContent = (scalar @parsedFile > 0); + + NaturalDocs::Project->SetHasContent($sourceFile, $hasContent); + if ($hasContent) + { NaturalDocs::Project->SetDefaultMenuTitle($sourceFile, $defaultMenuTitle); }; + + # We don't need to keep this around. + @parsedFile = ( ); + }; + + +# +# Function: ParseForBuild +# +# Parses the input file for building, returning it as a <NaturalDocs::Parser::ParsedTopic> arrayref. +# +# Note that all new and changed files should be parsed for symbols via <ParseForInformation()> before calling this function on +# *any* file. The reason is that <NaturalDocs::SymbolTable> needs to know about all the symbol definitions and references to +# resolve them properly. +# +# Parameters: +# +# file - The <FileName> to parse for building. +# +# Returns: +# +# An arrayref of the source file as <NaturalDocs::Parser::ParsedTopic> objects. +# +sub ParseForBuild #(file) + { + my ($self, $file) = @_; + $sourceFile = $file; + + $parsingForInformation = undef; + + $self->Parse(); + + return \@parsedFile; + }; + + + + +############################################################################### +# Group: Interface Functions + + +# +# Function: OnComment +# +# The function called by <NaturalDocs::Languages::Base>-derived objects when their parsers encounter a comment +# suitable for documentation. +# +# Parameters: +# +# commentLines - An arrayref of the comment's lines. The language's comment symbols should be converted to spaces, +# and there should be no line break characters at the end of each line. *The original memory will be +# changed.* +# lineNumber - The line number of the first of the comment lines. +# isJavaDoc - Whether the comment is in JavaDoc format. +# +# Returns: +# +# The number of topics created by this comment, or zero if none. +# +sub OnComment #(string[] commentLines, int lineNumber, bool isJavaDoc) + { + my ($self, $commentLines, $lineNumber, $isJavaDoc) = @_; + + $self->CleanComment($commentLines); + + # We check if it's definitely Natural Docs content first. This overrides all else, since it's possible that a comment could start + # with a topic line yet have something that looks like a JavaDoc tag. Natural Docs wins in this case. + if (NaturalDocs::Parser::Native->IsMine($commentLines, $isJavaDoc)) + { return NaturalDocs::Parser::Native->ParseComment($commentLines, $isJavaDoc, $lineNumber, \@parsedFile); } + + elsif (NaturalDocs::Parser::JavaDoc->IsMine($commentLines, $isJavaDoc)) + { return NaturalDocs::Parser::JavaDoc->ParseComment($commentLines, $isJavaDoc, $lineNumber, \@parsedFile); } + + # If the content is ambiguous and it's a JavaDoc-styled comment, treat it as Natural Docs content. + elsif ($isJavaDoc) + { return NaturalDocs::Parser::Native->ParseComment($commentLines, $isJavaDoc, $lineNumber, \@parsedFile); } + }; + + +# +# Function: OnClass +# +# A function called by <NaturalDocs::Languages::Base>-derived objects when their parsers encounter a class declaration. +# +# Parameters: +# +# class - The <SymbolString> of the class encountered. +# +sub OnClass #(class) + { + my ($self, $class) = @_; + + if ($parsingForInformation) + { NaturalDocs::ClassHierarchy->AddClass($sourceFile, $class); }; + }; + + +# +# Function: OnClassParent +# +# A function called by <NaturalDocs::Languages::Base>-derived objects when their parsers encounter a declaration of +# inheritance. +# +# Parameters: +# +# class - The <SymbolString> of the class we're in. +# parent - The <SymbolString> of the class it inherits. +# scope - The package <SymbolString> that the reference appeared in. +# using - An arrayref of package <SymbolStrings> that the reference has access to via "using" statements. +# resolvingFlags - Any <Resolving Flags> to be used when resolving the reference. <RESOLVE_NOPLURAL> is added +# automatically since that would never apply to source code. +# +sub OnClassParent #(class, parent, scope, using, resolvingFlags) + { + my ($self, $class, $parent, $scope, $using, $resolvingFlags) = @_; + + if ($parsingForInformation) + { + NaturalDocs::ClassHierarchy->AddParentReference($sourceFile, $class, $parent, $scope, $using, + $resolvingFlags | ::RESOLVE_NOPLURAL()); + }; + }; + + + +############################################################################### +# Group: Support Functions + + +# Function: Parse +# +# Opens the source file and parses process. Most of the actual parsing is done in <NaturalDocs::Languages::Base->ParseFile()> +# and <OnComment()>, though. +# +# *Do not call externally.* Rather, call <ParseForInformation()> or <ParseForBuild()>. +# +# Returns: +# +# The default menu title of the file. Will be the <FileName> if nothing better is found. +# +sub Parse + { + my ($self) = @_; + + NaturalDocs::Error->OnStartParsing($sourceFile); + + $language = NaturalDocs::Languages->LanguageOf($sourceFile); + NaturalDocs::Parser::Native->Start(); + @parsedFile = ( ); + + my ($autoTopics, $scopeRecord) = $language->ParseFile($sourceFile, \@parsedFile); + + + $self->AddToClassHierarchy(); + + $self->BreakLists(); + + if (defined $autoTopics) + { + if (defined $scopeRecord) + { $self->RepairPackages($autoTopics, $scopeRecord); }; + + $self->MergeAutoTopics($language, $autoTopics); + }; + + $self->RemoveRemainingHeaderlessTopics(); + + + # We don't need to do this if there aren't any auto-topics because the only package changes would be implied by the comments. + if (defined $autoTopics) + { $self->AddPackageDelineators(); }; + + if (!NaturalDocs::Settings->NoAutoGroup()) + { $self->MakeAutoGroups($autoTopics); }; + + + # Set the menu title. + + my $defaultMenuTitle = $sourceFile; + + if (scalar @parsedFile) + { + my $addFileTitle; + + if (NaturalDocs::Settings->OnlyFileTitles()) + { + # We still want to use the title from the topics if the first one is a file. + if ($parsedFile[0]->Type() eq ::TOPIC_FILE()) + { $addFileTitle = 0; } + else + { $addFileTitle = 1; }; + } + elsif (scalar @parsedFile == 1 || NaturalDocs::Topics->TypeInfo( $parsedFile[0]->Type() )->PageTitleIfFirst()) + { $addFileTitle = 0; } + else + { $addFileTitle = 1; }; + + if (!$addFileTitle) + { + $defaultMenuTitle = $parsedFile[0]->Title(); + } + else + { + # If the title ended up being the file name, add a leading section for it. + + unshift @parsedFile, + NaturalDocs::Parser::ParsedTopic->New(::TOPIC_FILE(), (NaturalDocs::File->SplitPath($sourceFile))[2], + undef, undef, undef, undef, undef, 1, undef); + }; + }; + + NaturalDocs::Error->OnEndParsing($sourceFile); + + return $defaultMenuTitle; + }; + + +# +# Function: CleanComment +# +# Removes any extraneous formatting and whitespace from the comment. Eliminates comment boxes, horizontal lines, trailing +# whitespace from lines, and expands all tab characters. It keeps leading whitespace, though, since it may be needed for +# example code, and blank lines, since the original line numbers are needed. +# +# Parameters: +# +# commentLines - An arrayref of the comment lines to clean. *The original memory will be changed.* Lines should have the +# language's comment symbols replaced by spaces and not have a trailing line break. +# +sub CleanComment #(commentLines) + { + my ($self, $commentLines) = @_; + + use constant DONT_KNOW => 0; + use constant IS_UNIFORM => 1; + use constant IS_UNIFORM_IF_AT_END => 2; + use constant IS_NOT_UNIFORM => 3; + + my $leftSide = DONT_KNOW; + my $rightSide = DONT_KNOW; + my $leftSideChar; + my $rightSideChar; + + my $index = 0; + my $tabLength = NaturalDocs::Settings->TabLength(); + + while ($index < scalar @$commentLines) + { + # Strip trailing whitespace from the original. + + $commentLines->[$index] =~ s/[ \t]+$//; + + + # Expand tabs in the original. This method is almost six times faster than Text::Tabs' method. + + my $tabIndex = index($commentLines->[$index], "\t"); + + while ($tabIndex != -1) + { + substr( $commentLines->[$index], $tabIndex, 1, ' ' x ($tabLength - ($tabIndex % $tabLength)) ); + $tabIndex = index($commentLines->[$index], "\t", $tabIndex); + }; + + + # Make a working copy and strip leading whitespace as well. This has to be done after tabs are expanded because + # stripping indentation could change how far tabs are expanded. + + my $line = $commentLines->[$index]; + $line =~ s/^ +//; + + # If the line is blank... + if (!length $line) + { + # If we have a potential vertical line, this only acceptable if it's at the end of the comment. + if ($leftSide == IS_UNIFORM) + { $leftSide = IS_UNIFORM_IF_AT_END; }; + if ($rightSide == IS_UNIFORM) + { $rightSide = IS_UNIFORM_IF_AT_END; }; + } + + # If there's at least four symbols in a row, it's a horizontal line. The second regex supports differing edge characters. It + # doesn't matter if any of this matches the left and right side symbols. The length < 256 is a sanity check, because that + # regexp has caused the perl regexp engine to choke on an insane line someone sent me from an automatically generated + # file. It had over 10k characters on the first line, and most of them were 0x00. + elsif ($line =~ /^([^a-zA-Z0-9 ])\1{3,}$/ || + (length $line < 256 && $line =~ /^([^a-zA-Z0-9 ])\1*([^a-zA-Z0-9 ])\2{3,}([^a-zA-Z0-9 ])\3*$/) ) + { + # Ignore it. This has no effect on the vertical line detection. We want to keep it in the output though in case it was + # in a code section. + } + + # If the line is not blank or a horizontal line... + else + { + # More content means any previous blank lines are no longer tolerated in vertical line detection. They are only + # acceptable at the end of the comment. + + if ($leftSide == IS_UNIFORM_IF_AT_END) + { $leftSide = IS_NOT_UNIFORM; }; + if ($rightSide == IS_UNIFORM_IF_AT_END) + { $rightSide = IS_NOT_UNIFORM; }; + + + # Detect vertical lines. Lines are only lines if they are followed by whitespace or a connected horizontal line. + # Otherwise we may accidentally detect lines from short comments that just happen to have every first or last + # character the same. + + if ($leftSide != IS_NOT_UNIFORM) + { + if ($line =~ /^([^a-zA-Z0-9])\1*(?: |$)/) + { + if ($leftSide == DONT_KNOW) + { + $leftSide = IS_UNIFORM; + $leftSideChar = $1; + } + else # ($leftSide == IS_UNIFORM) Other choices already ruled out. + { + if ($leftSideChar ne $1) + { $leftSide = IS_NOT_UNIFORM; }; + }; + } + # We'll tolerate the lack of symbols on the left on the first line, because it may be a + # /* Function: Whatever + # * Description. + # */ + # comment which would have the leading /* blanked out. + elsif ($index != 0) + { + $leftSide = IS_NOT_UNIFORM; + }; + }; + + if ($rightSide != IS_NOT_UNIFORM) + { + if ($line =~ / ([^a-zA-Z0-9])\1*$/) + { + if ($rightSide == DONT_KNOW) + { + $rightSide = IS_UNIFORM; + $rightSideChar = $1; + } + else # ($rightSide == IS_UNIFORM) Other choices already ruled out. + { + if ($rightSideChar ne $1) + { $rightSide = IS_NOT_UNIFORM; }; + }; + } + else + { + $rightSide = IS_NOT_UNIFORM; + }; + }; + + # We'll remove vertical lines later if they're uniform throughout the entire comment. + }; + + $index++; + }; + + + if ($leftSide == IS_UNIFORM_IF_AT_END) + { $leftSide = IS_UNIFORM; }; + if ($rightSide == IS_UNIFORM_IF_AT_END) + { $rightSide = IS_UNIFORM; }; + + + $index = 0; + my $inCodeSection = 0; + + while ($index < scalar @$commentLines) + { + # Clear horizontal lines only if we're not in a code section. + if ($commentLines->[$index] =~ /^ *([^a-zA-Z0-9 ])\1{3,}$/ || + ( length $commentLines->[$index] < 256 && + $commentLines->[$index] =~ /^ *([^a-zA-Z0-9 ])\1*([^a-zA-Z0-9 ])\2{3,}([^a-zA-Z0-9 ])\3*$/ ) ) + { + if (!$inCodeSection) + { $commentLines->[$index] = ''; } + } + + else + { + # Clear vertical lines. + + if ($leftSide == IS_UNIFORM) + { + # This works because every line should either start this way, be blank, or be the first line that doesn't start with a + # symbol. + $commentLines->[$index] =~ s/^ *([^a-zA-Z0-9 ])\1*//; + }; + + if ($rightSide == IS_UNIFORM) + { + $commentLines->[$index] =~ s/ *([^a-zA-Z0-9 ])\1*$//; + }; + + + # Clear horizontal lines again if there were vertical lines. This catches lines that were separated from the verticals by + # whitespace. + + if (($leftSide == IS_UNIFORM || $rightSide == IS_UNIFORM) && !$inCodeSection) + { + $commentLines->[$index] =~ s/^ *([^a-zA-Z0-9 ])\1{3,}$//; + $commentLines->[$index] =~ s/^ *([^a-zA-Z0-9 ])\1*([^a-zA-Z0-9 ])\2{3,}([^a-zA-Z0-9 ])\3*$//; + }; + + + # Check for the start and end of code sections. Note that this doesn't affect vertical line removal. + + if (!$inCodeSection && + $commentLines->[$index] =~ /^ *\( *(?:(?:start|begin)? +)?(?:table|code|example|diagram) *\)$/i ) + { + $inCodeSection = 1; + } + elsif ($inCodeSection && + $commentLines->[$index] =~ /^ *\( *(?:end|finish|done)(?: +(?:table|code|example|diagram))? *\)$/i) + { + $inCodeSection = 0; + } + } + + + $index++; + }; + + }; + + + +############################################################################### +# Group: Processing Functions + + +# +# Function: RepairPackages +# +# Recalculates the packages for all comment topics using the auto-topics and the scope record. Call this *before* calling +# <MergeAutoTopics()>. +# +# Parameters: +# +# autoTopics - A reference to the list of automatically generated <NaturalDocs::Parser::ParsedTopics>. +# scopeRecord - A reference to an array of <NaturalDocs::Languages::Advanced::ScopeChanges>. +# +sub RepairPackages #(autoTopics, scopeRecord) + { + my ($self, $autoTopics, $scopeRecord) = @_; + + my $topicIndex = 0; + my $autoTopicIndex = 0; + my $scopeIndex = 0; + + my $topic = $parsedFile[0]; + my $autoTopic = $autoTopics->[0]; + my $scopeChange = $scopeRecord->[0]; + + my $currentPackage; + my $inFakePackage; + + while (defined $topic) + { + # First update the scope via the record if its defined and has the lowest line number. + if (defined $scopeChange && + $scopeChange->LineNumber() <= $topic->LineNumber() && + (!defined $autoTopic || $scopeChange->LineNumber() <= $autoTopic->LineNumber()) ) + { + $currentPackage = $scopeChange->Scope(); + $scopeIndex++; + $scopeChange = $scopeRecord->[$scopeIndex]; # Will be undef when past end. + $inFakePackage = undef; + } + + # Next try to end a fake scope with an auto topic if its defined and has the lowest line number. + elsif (defined $autoTopic && + $autoTopic->LineNumber() <= $topic->LineNumber()) + { + if ($inFakePackage) + { + $currentPackage = $autoTopic->Package(); + $inFakePackage = undef; + }; + + $autoTopicIndex++; + $autoTopic = $autoTopics->[$autoTopicIndex]; # Will be undef when past end. + } + + + # Finally try to handle the topic, since it has the lowest line number. Check for Type() because headerless topics won't have + # one. + else + { + my $scope; + if ($topic->Type()) + { $scope = NaturalDocs::Topics->TypeInfo($topic->Type())->Scope(); } + else + { $scope = ::SCOPE_NORMAL(); }; + + if ($scope == ::SCOPE_START() || $scope == ::SCOPE_END()) + { + # They should already have the correct class and scope. + $currentPackage = $topic->Package(); + $inFakePackage = 1; + } + else + { + # Fix the package of everything else. + + # Note that the first function or variable topic to appear in a fake package will assume that package even if it turns out + # to be incorrect in the actual code, since the topic will come before the auto-topic. This will be corrected in + # MergeAutoTopics(). + + $topic->SetPackage($currentPackage); + }; + + $topicIndex++; + $topic = $parsedFile[$topicIndex]; # Will be undef when past end. + }; + }; + + }; + + +# +# Function: MergeAutoTopics +# +# Merges the automatically generated topics into the file. If an auto-topic matches an existing topic, it will have it's prototype +# and package transferred. If it doesn't, the auto-topic will be inserted into the list unless +# <NaturalDocs::Settings->DocumentedOnly()> is set. If an existing topic doesn't have a title, it's assumed to be a headerless +# comment and will be merged with the next auto-topic or discarded. +# +# Parameters: +# +# language - The <NaturalDocs::Languages::Base>-derived class for the file. +# autoTopics - A reference to the list of automatically generated topics. +# +sub MergeAutoTopics #(language, autoTopics) + { + my ($self, $language, $autoTopics) = @_; + + my $topicIndex = 0; + my $autoTopicIndex = 0; + + # Keys are topic types, values are existence hashrefs of titles. + my %topicsInLists; + + while ($topicIndex < scalar @parsedFile && $autoTopicIndex < scalar @$autoTopics) + { + my $topic = $parsedFile[$topicIndex]; + my $autoTopic = $autoTopics->[$autoTopicIndex]; + + my $cleanTitle = $topic->Title(); + $cleanTitle =~ s/[\t ]*\([^\(]*$//; + + # Add the auto-topic if it's higher in the file than the current topic. + if ($autoTopic->LineNumber() < $topic->LineNumber()) + { + if (exists $topicsInLists{$autoTopic->Type()} && + exists $topicsInLists{$autoTopic->Type()}->{$autoTopic->Title()}) + { + # Remove it from the list so a second one with the same name will be added. + delete $topicsInLists{$autoTopic->Type()}->{$autoTopic->Title()}; + } + elsif (!NaturalDocs::Settings->DocumentedOnly()) + { + splice(@parsedFile, $topicIndex, 0, $autoTopic); + $topicIndex++; + }; + + $autoTopicIndex++; + } + + # Remove a headerless topic if there's another topic between it and the next auto-topic. + elsif (!$topic->Title() && $topicIndex + 1 < scalar @parsedFile && + $parsedFile[$topicIndex+1]->LineNumber() < $autoTopic->LineNumber()) + { + splice(@parsedFile, $topicIndex, 1); + } + + # Transfer information if we have a match or a headerless topic. + elsif ( !$topic->Title() || ($topic->Type() == $autoTopic->Type() && index($autoTopic->Title(), $cleanTitle) != -1) ) + { + $topic->SetType($autoTopic->Type()); + $topic->SetPrototype($autoTopic->Prototype()); + $topic->SetUsing($autoTopic->Using()); + + if (!$topic->Title()) + { $topic->SetTitle($autoTopic->Title()); }; + + if (NaturalDocs::Topics->TypeInfo($topic->Type())->Scope() != ::SCOPE_START()) + { $topic->SetPackage($autoTopic->Package()); } + elsif ($autoTopic->Package() ne $topic->Package()) + { + my @autoPackageIdentifiers = NaturalDocs::SymbolString->IdentifiersOf($autoTopic->Package()); + my @packageIdentifiers = NaturalDocs::SymbolString->IdentifiersOf($topic->Package()); + + while (scalar @autoPackageIdentifiers && $autoPackageIdentifiers[-1] eq $packageIdentifiers[-1]) + { + pop @autoPackageIdentifiers; + pop @packageIdentifiers; + }; + + if (scalar @autoPackageIdentifiers) + { $topic->SetPackage( NaturalDocs::SymbolString->Join(@autoPackageIdentifiers) ); }; + }; + + $topicIndex++; + $autoTopicIndex++; + } + + # Extract topics in lists. + elsif ($topic->IsList()) + { + if (!exists $topicsInLists{$topic->Type()}) + { $topicsInLists{$topic->Type()} = { }; }; + + my $body = $topic->Body(); + + while ($body =~ /<ds>([^<]+)<\/ds>/g) + { $topicsInLists{$topic->Type()}->{NaturalDocs::NDMarkup->RestoreAmpChars($1)} = 1; }; + + $topicIndex++; + } + + # Otherwise there's no match. Skip the topic. The auto-topic will be added later. + else + { + $topicIndex++; + } + }; + + # Add any auto-topics remaining. + if (!NaturalDocs::Settings->DocumentedOnly()) + { + while ($autoTopicIndex < scalar @$autoTopics) + { + my $autoTopic = $autoTopics->[$autoTopicIndex]; + + if (exists $topicsInLists{$autoTopic->Type()} && + exists $topicsInLists{$autoTopic->Type()}->{$autoTopic->Title()}) + { + # Remove it from the list so a second one with the same name will be added. + delete $topicsInLists{$autoTopic->Type()}->{$autoTopic->Title()}; + } + else + { + push(@parsedFile, $autoTopic); + }; + + $autoTopicIndex++; + }; + }; + }; + + +# +# Function: RemoveRemainingHeaderlessTopics +# +# After <MergeAutoTopics()> is done, this function removes any remaining headerless topics from the file. If they don't merge +# into anything, they're not valid topics. +# +sub RemoveRemainingHeaderlessTopics + { + my ($self) = @_; + + my $index = 0; + while ($index < scalar @parsedFile) + { + if ($parsedFile[$index]->Title()) + { $index++; } + else + { splice(@parsedFile, $index, 1); }; + }; + }; + + +# +# Function: MakeAutoGroups +# +# Creates group topics for files that do not have them. +# +sub MakeAutoGroups + { + my ($self) = @_; + + # No groups only one topic. + if (scalar @parsedFile < 2) + { return; }; + + my $index = 0; + my $startStretch = 0; + + # Skip the first entry if its the page title. + if (NaturalDocs::Topics->TypeInfo( $parsedFile[0]->Type() )->PageTitleIfFirst()) + { + $index = 1; + $startStretch = 1; + }; + + # Make auto-groups for each stretch between scope-altering topics. + while ($index < scalar @parsedFile) + { + my $scope = NaturalDocs::Topics->TypeInfo($parsedFile[$index]->Type())->Scope(); + + if ($scope == ::SCOPE_START() || $scope == ::SCOPE_END()) + { + if ($index > $startStretch) + { $index += $self->MakeAutoGroupsFor($startStretch, $index); }; + + $startStretch = $index + 1; + }; + + $index++; + }; + + if ($index > $startStretch) + { $self->MakeAutoGroupsFor($startStretch, $index); }; + }; + + +# +# Function: MakeAutoGroupsFor +# +# Creates group topics for sections of files that do not have them. A support function for <MakeAutoGroups()>. +# +# Parameters: +# +# startIndex - The index to start at. +# endIndex - The index to end at. Not inclusive. +# +# Returns: +# +# The number of group topics added. +# +sub MakeAutoGroupsFor #(startIndex, endIndex) + { + my ($self, $startIndex, $endIndex) = @_; + + # No groups if any are defined already. + for (my $i = $startIndex; $i < $endIndex; $i++) + { + if ($parsedFile[$i]->Type() eq ::TOPIC_GROUP()) + { return 0; }; + }; + + + use constant COUNT => 0; + use constant TYPE => 1; + use constant SECOND_TYPE => 2; + use constant SIZE => 3; + + # This is an array of ( count, type, secondType ) triples. Count and Type will always be filled in; count is the number of + # consecutive topics of type. On the second pass, if small groups are combined secondType will be filled in. There will not be + # more than two types per group. + my @groups; + my $groupIndex = 0; + + + # First pass: Determine all the groups. + + my $i = $startIndex; + my $currentType; + + while ($i < $endIndex) + { + if (!defined $currentType || ($parsedFile[$i]->Type() ne $currentType && $parsedFile[$i]->Type() ne ::TOPIC_GENERIC()) ) + { + if (defined $currentType) + { $groupIndex += SIZE; }; + + $currentType = $parsedFile[$i]->Type(); + + $groups[$groupIndex + COUNT] = 1; + $groups[$groupIndex + TYPE] = $currentType; + } + else + { $groups[$groupIndex + COUNT]++; }; + + $i++; + }; + + + # Second pass: Combine groups based on "noise". Noise means types go from A to B to A at least once, and there are at least + # two groups in a row with three or less, and at least one of those groups is two or less. So 3, 3, 3 doesn't count as noise, but + # 3, 2, 3 does. + + $groupIndex = 0; + + # While there are at least three groups left... + while ($groupIndex < scalar @groups - (2 * SIZE)) + { + # If the group two places in front of this one has the same type... + if ($groups[$groupIndex + (2 * SIZE) + TYPE] eq $groups[$groupIndex + TYPE]) + { + # It means we went from A to B to A, which partially qualifies as noise. + + my $firstType = $groups[$groupIndex + TYPE]; + my $secondType = $groups[$groupIndex + SIZE + TYPE]; + + if (NaturalDocs::Topics->TypeInfo($firstType)->CanGroupWith($secondType) || + NaturalDocs::Topics->TypeInfo($secondType)->CanGroupWith($firstType)) + { + my $hasNoise; + + my $hasThrees; + my $hasTwosOrOnes; + + my $endIndex = $groupIndex; + + while ($endIndex < scalar @groups && + ($groups[$endIndex + TYPE] eq $firstType || $groups[$endIndex + TYPE] eq $secondType)) + { + if ($groups[$endIndex + COUNT] > 3) + { + # They must be consecutive to count. + $hasThrees = 0; + $hasTwosOrOnes = 0; + } + elsif ($groups[$endIndex + COUNT] == 3) + { + $hasThrees = 1; + + if ($hasTwosOrOnes) + { $hasNoise = 1; }; + } + else # < 3 + { + if ($hasThrees || $hasTwosOrOnes) + { $hasNoise = 1; }; + + $hasTwosOrOnes = 1; + }; + + $endIndex += SIZE; + }; + + if (!$hasNoise) + { + $groupIndex = $endIndex - SIZE; + } + else # hasNoise + { + $groups[$groupIndex + SECOND_TYPE] = $secondType; + + for (my $noiseIndex = $groupIndex + SIZE; $noiseIndex < $endIndex; $noiseIndex += SIZE) + { + $groups[$groupIndex + COUNT] += $groups[$noiseIndex + COUNT]; + }; + + splice(@groups, $groupIndex + SIZE, $endIndex - $groupIndex - SIZE); + + $groupIndex += SIZE; + }; + } + + else # They can't group together + { + $groupIndex += SIZE; + }; + } + + else + { $groupIndex += SIZE; }; + }; + + + # Finally, create group topics for the parsed file. + + $groupIndex = 0; + $i = $startIndex; + + while ($groupIndex < scalar @groups) + { + if ($groups[$groupIndex + TYPE] ne ::TOPIC_GENERIC()) + { + my $topic = $parsedFile[$i]; + my $title = NaturalDocs::Topics->NameOfType($groups[$groupIndex + TYPE], 1); + + if (defined $groups[$groupIndex + SECOND_TYPE]) + { $title .= ' and ' . NaturalDocs::Topics->NameOfType($groups[$groupIndex + SECOND_TYPE], 1); }; + + splice(@parsedFile, $i, 0, NaturalDocs::Parser::ParsedTopic->New(::TOPIC_GROUP(), + $title, + $topic->Package(), $topic->Using(), + undef, undef, undef, + $topic->LineNumber()) ); + $i++; + }; + + $i += $groups[$groupIndex + COUNT]; + $groupIndex += SIZE; + }; + + return (scalar @groups / SIZE); + }; + + +# +# Function: AddToClassHierarchy +# +# Adds any class topics to the class hierarchy, since they may not have been called with <OnClass()> if they didn't match up to +# an auto-topic. +# +sub AddToClassHierarchy + { + my ($self) = @_; + + foreach my $topic (@parsedFile) + { + if ($topic->Type() && NaturalDocs::Topics->TypeInfo( $topic->Type() )->ClassHierarchy()) + { + if ($topic->IsList()) + { + my $body = $topic->Body(); + + while ($body =~ /<ds>([^<]+)<\/ds>/g) + { + $self->OnClass( NaturalDocs::SymbolString->FromText( NaturalDocs::NDMarkup->RestoreAmpChars($1) ) ); + }; + } + else + { + $self->OnClass($topic->Package()); + }; + }; + }; + }; + + +# +# Function: AddPackageDelineators +# +# Adds section and class topics to make sure the package is correctly represented in the documentation. Should be called last in +# this process. +# +sub AddPackageDelineators + { + my ($self) = @_; + + my $index = 0; + my $currentPackage; + + # Values are the arrayref [ title, type ]; + my %usedPackages; + + while ($index < scalar @parsedFile) + { + my $topic = $parsedFile[$index]; + + if ($topic->Package() ne $currentPackage) + { + $currentPackage = $topic->Package(); + my $scopeType = NaturalDocs::Topics->TypeInfo($topic->Type())->Scope(); + + if ($scopeType == ::SCOPE_START()) + { + $usedPackages{$currentPackage} = [ $topic->Title(), $topic->Type() ]; + } + elsif ($scopeType == ::SCOPE_END()) + { + my $newTopic; + + if (!defined $currentPackage) + { + $newTopic = NaturalDocs::Parser::ParsedTopic->New(::TOPIC_SECTION(), 'Global', + undef, undef, + undef, undef, undef, + $topic->LineNumber(), undef); + } + else + { + my ($title, $body, $summary, $type); + my @packageIdentifiers = NaturalDocs::SymbolString->IdentifiersOf($currentPackage); + + if (exists $usedPackages{$currentPackage}) + { + $title = $usedPackages{$currentPackage}->[0]; + $type = $usedPackages{$currentPackage}->[1]; + $body = '<p>(continued)</p>'; + $summary = '(continued)'; + } + else + { + $title = join($language->PackageSeparator(), @packageIdentifiers); + $type = ::TOPIC_CLASS(); + + # Body and summary stay undef. + + $usedPackages{$currentPackage} = $title; + }; + + my @titleIdentifiers = NaturalDocs::SymbolString->IdentifiersOf( NaturalDocs::SymbolString->FromText($title) ); + for (my $i = 0; $i < scalar @titleIdentifiers; $i++) + { pop @packageIdentifiers; }; + + $newTopic = NaturalDocs::Parser::ParsedTopic->New($type, $title, + NaturalDocs::SymbolString->Join(@packageIdentifiers), undef, + undef, $summary, $body, + $topic->LineNumber(), undef); + } + + splice(@parsedFile, $index, 0, $newTopic); + $index++; + } + }; + + $index++; + }; + }; + + +# +# Function: BreakLists +# +# Breaks list topics into individual topics. +# +sub BreakLists + { + my $self = shift; + + my $index = 0; + + while ($index < scalar @parsedFile) + { + my $topic = $parsedFile[$index]; + + if ($topic->IsList() && NaturalDocs::Topics->TypeInfo( $topic->Type() )->BreakLists()) + { + my $body = $topic->Body(); + + my @newTopics; + my $newBody; + + my $bodyIndex = 0; + + for (;;) + { + my $startList = index($body, '<dl>', $bodyIndex); + + if ($startList == -1) + { last; }; + + $newBody .= substr($body, $bodyIndex, $startList - $bodyIndex); + + my $endList = index($body, '</dl>', $startList); + my $listBody = substr($body, $startList, $endList - $startList); + + while ($listBody =~ /<ds>([^<]+)<\/ds><dd>(.*?)<\/dd>/g) + { + my ($symbol, $description) = ($1, $2); + + push @newTopics, NaturalDocs::Parser::ParsedTopic->New( $topic->Type(), $symbol, $topic->Package(), + $topic->Using(), undef, + $self->GetSummaryFromDescriptionList($description), + '<p>' . $description . '</p>', $topic->LineNumber(), + undef ); + }; + + $bodyIndex = $endList + 5; + }; + + $newBody .= substr($body, $bodyIndex); + + # Remove trailing headings. + $newBody =~ s/(?:<h>[^<]+<\/h>)+$//; + + # Remove empty headings. + $newBody =~ s/(?:<h>[^<]+<\/h>)+(<h>[^<]+<\/h>)/$1/g; + + if ($newBody) + { + unshift @newTopics, NaturalDocs::Parser::ParsedTopic->New( ::TOPIC_GROUP(), $topic->Title(), $topic->Package(), + $topic->Using(), undef, + $self->GetSummaryFromBody($newBody), $newBody, + $topic->LineNumber(), undef ); + }; + + splice(@parsedFile, $index, 1, @newTopics); + + $index += scalar @newTopics; + } + + else # not a list + { $index++; }; + }; + }; + + +# +# Function: GetSummaryFromBody +# +# Returns the summary text from the topic body. +# +# Parameters: +# +# body - The complete topic body, in <NDMarkup>. +# +# Returns: +# +# The topic summary, or undef if none. +# +sub GetSummaryFromBody #(body) + { + my ($self, $body) = @_; + + my $summary; + + # Extract the first sentence from the leading paragraph, if any. We'll tolerate a single header beforehand, but nothing else. + + if ($body =~ /^(?:<h>[^<]*<\/h>)?<p>(.*?)(<\/p>|[\.\!\?](?:[\)\}\'\ ]|"|>))/x) + { + $summary = $1; + + if ($2 ne '</p>') + { $summary .= $2; }; + }; + + return $summary; + }; + + +# +# Function: GetSummaryFromDescriptionList +# +# Returns the summary text from a description list entry. +# +# Parameters: +# +# description - The description in <NDMarkup>. Should be the content between the <dd></dd> tags only. +# +# Returns: +# +# The description summary, or undef if none. +# +sub GetSummaryFromDescriptionList #(description) + { + my ($self, $description) = @_; + + my $summary; + + if ($description =~ /^(.*?)($|[\.\!\?](?:[\)\}\'\ ]|"|>))/) + { $summary = $1 . $2; }; + + return $summary; + }; + + +1; |