diff options
Diffstat (limited to 'docs/tool/Modules/NaturalDocs/Languages/Advanced.pm')
| -rw-r--r-- | docs/tool/Modules/NaturalDocs/Languages/Advanced.pm | 828 |
1 files changed, 828 insertions, 0 deletions
diff --git a/docs/tool/Modules/NaturalDocs/Languages/Advanced.pm b/docs/tool/Modules/NaturalDocs/Languages/Advanced.pm new file mode 100644 index 00000000..8ae27bfc --- /dev/null +++ b/docs/tool/Modules/NaturalDocs/Languages/Advanced.pm @@ -0,0 +1,828 @@ +############################################################################### +# +# Class: NaturalDocs::Languages::Advanced +# +############################################################################### +# +# The base class for all languages that have full support in Natural Docs. Each one will have a custom parser capable +# of documenting undocumented aspects of the code. +# +############################################################################### + +# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure +# Natural Docs is licensed under the GPL + +use strict; +use integer; + +use NaturalDocs::Languages::Advanced::Scope; +use NaturalDocs::Languages::Advanced::ScopeChange; + +package NaturalDocs::Languages::Advanced; + +use base 'NaturalDocs::Languages::Base'; + + +############################################################################# +# Group: Implementation + +# +# Constants: Members +# +# The class is implemented as a blessed arrayref. The following constants are used as indexes. +# +# TOKENS - An arrayref of tokens used in all the <Parsing Functions>. +# SCOPE_STACK - An arrayref of <NaturalDocs::Languages::Advanced::Scope> objects serving as a scope stack for parsing. +# There will always be one available, with a symbol of undef, for the top level. +# SCOPE_RECORD - An arrayref of <NaturalDocs::Languages::Advanced::ScopeChange> objects, as generated by the scope +# stack. If there is more than one change per line, only the last is stored. +# AUTO_TOPICS - An arrayref of <NaturalDocs::Parser::ParsedTopics> generated automatically from the code. +# +use NaturalDocs::DefineMembers 'TOKENS', 'SCOPE_STACK', 'SCOPE_RECORD', 'AUTO_TOPICS'; + + +############################################################################# +# Group: Functions + +# +# Function: New +# +# Creates and returns a new object. +# +# Parameters: +# +# name - The name of the language. +# +sub New #(name) + { + my ($package, @parameters) = @_; + + my $object = $package->SUPER::New(@parameters); + $object->[TOKENS] = undef; + $object->[SCOPE_STACK] = undef; + $object->[SCOPE_RECORD] = undef; + + return $object; + }; + + +# Function: Tokens +# Returns the tokens found by <ParseForCommentsAndTokens()>. +sub Tokens + { return $_[0]->[TOKENS]; }; + +# Function: SetTokens +# Replaces the tokens. +sub SetTokens #(tokens) + { $_[0]->[TOKENS] = $_[1]; }; + +# Function: ClearTokens +# Resets the token list. You may want to do this after parsing is over to save memory. +sub ClearTokens + { $_[0]->[TOKENS] = undef; }; + +# Function: AutoTopics +# Returns the arrayref of automatically generated topics, or undef if none. +sub AutoTopics + { return $_[0]->[AUTO_TOPICS]; }; + +# Function: AddAutoTopic +# Adds a <NaturalDocs::Parser::ParsedTopic> to <AutoTopics()>. +sub AddAutoTopic #(topic) + { + my ($self, $topic) = @_; + if (!defined $self->[AUTO_TOPICS]) + { $self->[AUTO_TOPICS] = [ ]; }; + push @{$self->[AUTO_TOPICS]}, $topic; + }; + +# Function: ClearAutoTopics +# Resets the automatic topic list. Not necessary if you call <ParseForCommentsAndTokens()>. +sub ClearAutoTopics + { $_[0]->[AUTO_TOPICS] = undef; }; + +# Function: ScopeRecord +# Returns an arrayref of <NaturalDocs::Languages::Advanced::ScopeChange> objects describing how and when the scope +# changed thoughout the file. There will always be at least one entry, which will be for line 1 and undef as the scope. +sub ScopeRecord + { return $_[0]->[SCOPE_RECORD]; }; + + + +############################################################################### +# +# Group: Parsing Functions +# +# These functions are good general language building blocks. Use them to create your language-specific parser. +# +# All functions work on <Tokens()> and assume it is set by <ParseForCommentsAndTokens()>. +# + + +# +# Function: ParseForCommentsAndTokens +# +# Loads the passed file, sends all appropriate comments to <NaturalDocs::Parser->OnComment()>, and breaks the rest into +# an arrayref of tokens. Tokens are defined as +# +# - All consecutive alphanumeric and underscore characters. +# - All consecutive whitespace. +# - A single line break. It will always be "\n"; you don't have to worry about platform differences. +# - A single character not included above, which is usually a symbol. Multiple consecutive ones each get their own token. +# +# The result will be placed in <Tokens()>. +# +# Parameters: +# +# sourceFile - The source <FileName> to load and parse. +# lineCommentSymbols - An arrayref of symbols that designate line comments, or undef if none. +# blockCommentSymbols - An arrayref of symbol pairs that designate multiline comments, or undef if none. Symbol pairs are +# designated as two consecutive array entries, the opening symbol appearing first. +# javadocLineCommentSymbols - An arrayref of symbols that designate the start of a JavaDoc comment, or undef if none. +# javadocBlockCommentSymbols - An arrayref of symbol pairs that designate multiline JavaDoc comments, or undef if none. +# +# Notes: +# +# - This function automatically calls <ClearAutoTopics()> and <ClearScopeStack()>. You only need to call those functions +# manually if you override this one. +# - To save parsing time, all comment lines sent to <NaturalDocs::Parser->OnComment()> will be replaced with blank lines +# in <Tokens()>. It's all the same to most languages. +# +sub ParseForCommentsAndTokens #(FileName sourceFile, string[] lineCommentSymbols, string[] blockCommentSymbols, string[] javadocLineCommentSymbols, string[] javadocBlockCommentSymbols) + { + my ($self, $sourceFile, $lineCommentSymbols, $blockCommentSymbols, + $javadocLineCommentSymbols, $javadocBlockCommentSymbols) = @_; + + open(SOURCEFILEHANDLE, '<' . $sourceFile) + or die "Couldn't open input file " . $sourceFile . "\n"; + + my $tokens = [ ]; + $self->SetTokens($tokens); + + # For convenience. + $self->ClearAutoTopics(); + $self->ClearScopeStack(); + + + # Load and preprocess the file + + my @lines; + my $line = <SOURCEFILEHANDLE>; + + # On the very first line, remove a Unicode BOM if present. Information on it available at: + # http://www.unicode.org/faq/utf_bom.html#BOM + $line =~ s/^\xEF\xBB\xBF//; + + while (defined $line) + { + ::XChomp(\$line); + push @lines, $line; + + $line = <SOURCEFILEHANDLE>; + }; + + close(SOURCEFILEHANDLE); + + $self->PreprocessFile(\@lines); + + + # Go through the file + + my $lineIndex = 0; + + while ($lineIndex < scalar @lines) + { + $line = $lines[$lineIndex]; + + my @commentLines; + my $commentLineNumber; + my $isJavaDoc; + my $closingSymbol; + + + # Retrieve single line comments. This leaves $lineIndex at the next line. + + if ( ($isJavaDoc = $self->StripOpeningJavaDocSymbols(\$line, $javadocLineCommentSymbols)) || + $self->StripOpeningSymbols(\$line, $lineCommentSymbols)) + { + $commentLineNumber = $lineIndex + 1; + + do + { + push @commentLines, $line; + push @$tokens, "\n"; + + $lineIndex++; + + if ($lineIndex >= scalar @lines) + { goto EndDo; }; + + $line = $lines[$lineIndex]; + } + while ($self->StripOpeningSymbols(\$line, $lineCommentSymbols)); + + EndDo: # I hate Perl sometimes. + } + + + # Retrieve multiline comments. This leaves $lineIndex at the next line. + + elsif ( ($isJavaDoc = $self->StripOpeningJavaDocBlockSymbols(\$line, $javadocBlockCommentSymbols)) || + ($closingSymbol = $self->StripOpeningBlockSymbols(\$line, $blockCommentSymbols)) ) + { + $commentLineNumber = $lineIndex + 1; + + if ($isJavaDoc) + { $closingSymbol = $isJavaDoc; }; + + # Note that it is possible for a multiline comment to start correctly but not end so. We want those comments to stay in + # the code. For example, look at this prototype with this splint annotation: + # + # int get_array(integer_t id, + # /*@out@*/ array_t array); + # + # The annotation starts correctly but doesn't end so because it is followed by code on the same line. + + my ($lineRemainder, $isMultiLine); + + for (;;) + { + $lineRemainder = $self->StripClosingSymbol(\$line, $closingSymbol); + + push @commentLines, $line; + + # If we found an end comment symbol... + if (defined $lineRemainder) + { last; }; + + push @$tokens, "\n"; + $lineIndex++; + $isMultiLine = 1; + + if ($lineIndex >= scalar @lines) + { last; }; + + $line = $lines[$lineIndex]; + }; + + if ($lineRemainder !~ /^[ \t]*$/) + { + # If there was something past the closing symbol this wasn't an acceptable comment. + + if ($isMultiLine) + { $self->TokenizeLine($lineRemainder); } + else + { + # We go back to the original line if it wasn't a multiline comment because we want the comment to stay in the + # code. Otherwise the /*@out@*/ from the example would be removed. + $self->TokenizeLine($lines[$lineIndex]); + }; + + @commentLines = ( ); + } + else + { + push @$tokens, "\n"; + }; + + $lineIndex++; + } + + + # Otherwise just add it to the code. + + else + { + $self->TokenizeLine($line); + $lineIndex++; + }; + + + # If there were comments, send them to Parser->OnComment(). + + if (scalar @commentLines) + { + NaturalDocs::Parser->OnComment(\@commentLines, $commentLineNumber, $isJavaDoc); + @commentLines = ( ); + $isJavaDoc = undef; + }; + + # $lineIndex was incremented by the individual code paths above. + + }; # while ($lineIndex < scalar @lines) + }; + + +# +# Function: PreprocessFile +# +# An overridable function if you'd like to preprocess the file before it goes into <ParseForCommentsAndTokens()>. +# +# Parameters: +# +# lines - An arrayref to the file's lines. Each line has its line break stripped off, but is otherwise untouched. +# +sub PreprocessFile #(lines) + { + }; + + +# +# Function: TokenizeLine +# +# Converts the passed line to tokens as described in <ParseForCommentsAndTokens> and adds them to <Tokens()>. Also +# adds a line break token after it. +# +sub TokenizeLine #(line) + { + my ($self, $line) = @_; + push @{$self->Tokens()}, $line =~ /(\w+|[ \t]+|.)/g, "\n"; + }; + + +# +# Function: TryToSkipString +# +# If the position is on a string delimiter, moves the position to the token following the closing delimiter, or past the end of the +# tokens if there is none. Assumes all other characters are allowed in the string, the delimiter itself is allowed if it's preceded by +# a backslash, and line breaks are allowed in the string. +# +# Parameters: +# +# indexRef - A reference to the position's index into <Tokens()>. +# lineNumberRef - A reference to the position's line number. +# openingDelimiter - The opening string delimiter, such as a quote or an apostrophe. +# closingDelimiter - The closing string delimiter, if different. If not defined, assumes the same as openingDelimiter. +# startContentIndexRef - A reference to a variable in which to store the index of the first token of the string's content. +# May be undef. +# endContentIndexRef - A reference to a variable in which to store the index of the end of the string's content, which is one +# past the last index of content. May be undef. +# +# Returns: +# +# Whether the position was on the passed delimiter or not. The index, line number, and content index ref variables will be +# updated only if true. +# +sub TryToSkipString #(indexRef, lineNumberRef, openingDelimiter, closingDelimiter, startContentIndexRef, endContentIndexRef) + { + my ($self, $index, $lineNumber, $openingDelimiter, $closingDelimiter, $startContentIndexRef, $endContentIndexRef) = @_; + my $tokens = $self->Tokens(); + + if (!defined $closingDelimiter) + { $closingDelimiter = $openingDelimiter; }; + + if ($tokens->[$$index] ne $openingDelimiter) + { return undef; }; + + + $$index++; + if (defined $startContentIndexRef) + { $$startContentIndexRef = $$index; }; + + while ($$index < scalar @$tokens) + { + if ($tokens->[$$index] eq "\\") + { + # Skip the token after it. + $$index += 2; + } + elsif ($tokens->[$$index] eq "\n") + { + $$lineNumber++; + $$index++; + } + elsif ($tokens->[$$index] eq $closingDelimiter) + { + if (defined $endContentIndexRef) + { $$endContentIndexRef = $$index; }; + + $$index++; + last; + } + else + { + $$index++; + }; + }; + + if ($$index >= scalar @$tokens && defined $endContentIndexRef) + { $$endContentIndexRef = scalar @$tokens; }; + + return 1; + }; + + +# +# Function: SkipRestOfLine +# +# Moves the position to the token following the next line break, or past the end of the tokens array if there is none. Useful for +# line comments. +# +# Note that it skips blindly. It assumes there cannot be anything of interest, such as a string delimiter, between the position +# and the end of the line. +# +# Parameters: +# +# indexRef - A reference to the position's index into <Tokens()>. +# lineNumberRef - A reference to the position's line number. + +sub SkipRestOfLine #(indexRef, lineNumberRef) + { + my ($self, $index, $lineNumber) = @_; + my $tokens = $self->Tokens(); + + while ($$index < scalar @$tokens) + { + if ($tokens->[$$index] eq "\n") + { + $$lineNumber++; + $$index++; + last; + } + else + { + $$index++; + }; + }; + }; + + +# +# Function: SkipUntilAfter +# +# Moves the position to the token following the next occurance of a particular token sequence, or past the end of the tokens +# array if it never occurs. Useful for multiline comments. +# +# Note that it skips blindly. It assumes there cannot be anything of interest, such as a string delimiter, between the position +# and the end of the line. +# +# Parameters: +# +# indexRef - A reference to the position's index. +# lineNumberRef - A reference to the position's line number. +# token - A token that must be matched. Can be specified multiple times to match a sequence of tokens. +# +sub SkipUntilAfter #(indexRef, lineNumberRef, token, token, ...) + { + my ($self, $index, $lineNumber, @target) = @_; + my $tokens = $self->Tokens(); + + while ($$index < scalar @$tokens) + { + if ($tokens->[$$index] eq $target[0] && ($$index + scalar @target) <= scalar @$tokens) + { + my $match = 1; + + for (my $i = 1; $i < scalar @target; $i++) + { + if ($tokens->[$$index+$i] ne $target[$i]) + { + $match = 0; + last; + }; + }; + + if ($match) + { + $$index += scalar @target; + return; + }; + }; + + if ($tokens->[$$index] eq "\n") + { + $$lineNumber++; + $$index++; + } + else + { + $$index++; + }; + }; + }; + + +# +# Function: IsFirstLineToken +# +# Returns whether the position is at the first token of a line, not including whitespace. +# +# Parameters: +# +# index - The index of the position. +# +sub IsFirstLineToken #(index) + { + my ($self, $index) = @_; + my $tokens = $self->Tokens(); + + if ($index == 0) + { return 1; }; + + $index--; + + if ($tokens->[$index] =~ /^[ \t]/) + { $index--; }; + + if ($index <= 0 || $tokens->[$index] eq "\n") + { return 1; } + else + { return undef; }; + }; + + +# +# Function: IsLastLineToken +# +# Returns whether the position is at the last token of a line, not including whitespace. +# +# Parameters: +# +# index - The index of the position. +# +sub IsLastLineToken #(index) + { + my ($self, $index) = @_; + my $tokens = $self->Tokens(); + + do + { $index++; } + while ($index < scalar @$tokens && $tokens->[$index] =~ /^[ \t]/); + + if ($index >= scalar @$tokens || $tokens->[$index] eq "\n") + { return 1; } + else + { return undef; }; + }; + + +# +# Function: IsAtSequence +# +# Returns whether the position is at a sequence of tokens. +# +# Parameters: +# +# index - The index of the position. +# token - A token to match. Specify multiple times to specify the sequence. +# +sub IsAtSequence #(index, token, token, token ...) + { + my ($self, $index, @target) = @_; + my $tokens = $self->Tokens(); + + if ($index + scalar @target > scalar @$tokens) + { return undef; }; + + for (my $i = 0; $i < scalar @target; $i++) + { + if ($tokens->[$index + $i] ne $target[$i]) + { return undef; }; + }; + + return 1; + }; + + +# +# Function: IsBackslashed +# +# Returns whether the position is after a backslash. +# +# Parameters: +# +# index - The index of the postition. +# +sub IsBackslashed #(index) + { + my ($self, $index) = @_; + my $tokens = $self->Tokens(); + + if ($index > 0 && $tokens->[$index - 1] eq "\\") + { return 1; } + else + { return undef; }; + }; + + + +############################################################################### +# +# Group: Scope Functions +# +# These functions provide a nice scope stack implementation for language-specific parsers to use. The default implementation +# makes the following assumptions. +# +# - Packages completely replace one another, rather than concatenating. You need to concatenate manually if that's the +# behavior. +# +# - Packages inherit, so if a scope level doesn't set its own, the package is the same as the parent scope's. +# + + +# +# Function: ClearScopeStack +# +# Clears the scope stack for a new file. Not necessary if you call <ParseForCommentsAndTokens()>. +# +sub ClearScopeStack + { + my ($self) = @_; + $self->[SCOPE_STACK] = [ NaturalDocs::Languages::Advanced::Scope->New(undef, undef) ]; + $self->[SCOPE_RECORD] = [ NaturalDocs::Languages::Advanced::ScopeChange->New(undef, 1) ]; + }; + + +# +# Function: StartScope +# +# Records a new scope level. +# +# Parameters: +# +# closingSymbol - The closing symbol of the scope. +# lineNumber - The line number where the scope begins. +# package - The package <SymbolString> of the scope. Undef means no change. +# +sub StartScope #(closingSymbol, lineNumber, package) + { + my ($self, $closingSymbol, $lineNumber, $package) = @_; + + push @{$self->[SCOPE_STACK]}, + NaturalDocs::Languages::Advanced::Scope->New($closingSymbol, $package, $self->CurrentUsing()); + + $self->AddToScopeRecord($self->CurrentScope(), $lineNumber); + }; + + +# +# Function: EndScope +# +# Records the end of the current scope level. Note that this is blind; you need to manually check <ClosingScopeSymbol()> if +# you need to determine if it is correct to do so. +# +# Parameters: +# +# lineNumber - The line number where the scope ends. +# +sub EndScope #(lineNumber) + { + my ($self, $lineNumber) = @_; + + if (scalar @{$self->[SCOPE_STACK]} > 1) + { pop @{$self->[SCOPE_STACK]}; }; + + $self->AddToScopeRecord($self->CurrentScope(), $lineNumber); + }; + + +# +# Function: ClosingScopeSymbol +# +# Returns the symbol that ends the current scope level, or undef if we are at the top level. +# +sub ClosingScopeSymbol + { + my ($self) = @_; + return $self->[SCOPE_STACK]->[-1]->ClosingSymbol(); + }; + + +# +# Function: CurrentScope +# +# Returns the current calculated scope, or undef if global. The default implementation just returns <CurrentPackage()>. This +# is a separate function because C++ may need to track namespaces and classes separately, and so the current scope would +# be a concatenation of them. +# +sub CurrentScope + { + return $_[0]->CurrentPackage(); + }; + + +# +# Function: CurrentPackage +# +# Returns the current calculated package or class, or undef if none. +# +sub CurrentPackage + { + my ($self) = @_; + + my $package; + + for (my $index = scalar @{$self->[SCOPE_STACK]} - 1; $index >= 0 && !defined $package; $index--) + { + $package = $self->[SCOPE_STACK]->[$index]->Package(); + }; + + return $package; + }; + + +# +# Function: SetPackage +# +# Sets the package for the current scope level. +# +# Parameters: +# +# package - The new package <SymbolString>. +# lineNumber - The line number the new package starts on. +# +sub SetPackage #(package, lineNumber) + { + my ($self, $package, $lineNumber) = @_; + $self->[SCOPE_STACK]->[-1]->SetPackage($package); + + $self->AddToScopeRecord($self->CurrentScope(), $lineNumber); + }; + + +# +# Function: CurrentUsing +# +# Returns the current calculated arrayref of <SymbolStrings> from Using statements, or undef if none. +# +sub CurrentUsing + { + my ($self) = @_; + return $self->[SCOPE_STACK]->[-1]->Using(); + }; + + +# +# Function: AddUsing +# +# Adds a Using <SymbolString> to the current scope. +# +sub AddUsing #(using) + { + my ($self, $using) = @_; + $self->[SCOPE_STACK]->[-1]->AddUsing($using); + }; + + + +############################################################################### +# Group: Support Functions + + +# +# Function: AddToScopeRecord +# +# Adds a change to the scope record, condensing unnecessary entries. +# +# Parameters: +# +# newScope - What the scope <SymbolString> changed to. +# lineNumber - Where the scope changed. +# +sub AddToScopeRecord #(newScope, lineNumber) + { + my ($self, $scope, $lineNumber) = @_; + my $scopeRecord = $self->ScopeRecord(); + + if ($scope ne $scopeRecord->[-1]->Scope()) + { + if ($scopeRecord->[-1]->LineNumber() == $lineNumber) + { $scopeRecord->[-1]->SetScope($scope); } + else + { push @$scopeRecord, NaturalDocs::Languages::Advanced::ScopeChange->New($scope, $lineNumber); }; + }; + }; + + +# +# Function: CreateString +# +# Converts the specified tokens into a string and returns it. +# +# Parameters: +# +# startIndex - The starting index to convert. +# endIndex - The ending index, which is *not inclusive*. +# +# Returns: +# +# The string. +# +sub CreateString #(startIndex, endIndex) + { + my ($self, $startIndex, $endIndex) = @_; + my $tokens = $self->Tokens(); + + my $string; + + while ($startIndex < $endIndex && $startIndex < scalar @$tokens) + { + $string .= $tokens->[$startIndex]; + $startIndex++; + }; + + return $string; + }; + + +1; |