diff options
Diffstat (limited to 'docs/tool/Modules/NaturalDocs/Languages')
| -rw-r--r-- | docs/tool/Modules/NaturalDocs/Languages/ActionScript.pm | 1473 | ||||
| -rw-r--r-- | docs/tool/Modules/NaturalDocs/Languages/Ada.pm | 38 | ||||
| -rw-r--r-- | docs/tool/Modules/NaturalDocs/Languages/Advanced.pm | 828 | ||||
| -rw-r--r-- | docs/tool/Modules/NaturalDocs/Languages/Advanced/Scope.pm | 95 | ||||
| -rw-r--r-- | docs/tool/Modules/NaturalDocs/Languages/Advanced/ScopeChange.pm | 70 | ||||
| -rw-r--r-- | docs/tool/Modules/NaturalDocs/Languages/Base.pm | 832 | ||||
| -rw-r--r-- | docs/tool/Modules/NaturalDocs/Languages/CSharp.pm | 1484 | ||||
| -rw-r--r-- | docs/tool/Modules/NaturalDocs/Languages/PLSQL.pm | 319 | ||||
| -rw-r--r-- | docs/tool/Modules/NaturalDocs/Languages/Pascal.pm | 143 | ||||
| -rw-r--r-- | docs/tool/Modules/NaturalDocs/Languages/Perl.pm | 1370 | ||||
| -rw-r--r-- | docs/tool/Modules/NaturalDocs/Languages/Prototype.pm | 92 | ||||
| -rw-r--r-- | docs/tool/Modules/NaturalDocs/Languages/Prototype/Parameter.pm | 87 | ||||
| -rw-r--r-- | docs/tool/Modules/NaturalDocs/Languages/Simple.pm | 503 | ||||
| -rw-r--r-- | docs/tool/Modules/NaturalDocs/Languages/Tcl.pm | 219 |
14 files changed, 7553 insertions, 0 deletions
diff --git a/docs/tool/Modules/NaturalDocs/Languages/ActionScript.pm b/docs/tool/Modules/NaturalDocs/Languages/ActionScript.pm new file mode 100644 index 00000000..a55abaf2 --- /dev/null +++ b/docs/tool/Modules/NaturalDocs/Languages/ActionScript.pm @@ -0,0 +1,1473 @@ +############################################################################### +# +# Class: NaturalDocs::Languages::ActionScript +# +############################################################################### +# +# A subclass to handle the language variations of Flash ActionScript. +# +############################################################################### + +# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure +# Natural Docs is licensed under the GPL + +use strict; +use integer; + +package NaturalDocs::Languages::ActionScript; + +use base 'NaturalDocs::Languages::Advanced'; + + +################################################################################ +# Group: Constants and Types + + +# +# Constants: XML Tag Type +# +# XML_OPENING_TAG - The tag is an opening one, such as <tag>. +# XML_CLOSING_TAG - The tag is a closing one, such as </tag>. +# XML_SELF_CONTAINED_TAG - The tag is self contained, such as <tag />. +# +use constant XML_OPENING_TAG => 1; +use constant XML_CLOSING_TAG => 2; +use constant XML_SELF_CONTAINED_TAG => 3; + + +################################################################################ +# Group: Package Variables + +# +# hash: classModifiers +# An existence hash of all the acceptable class modifiers. The keys are in all lowercase. +# +my %classModifiers = ( 'dynamic' => 1, + 'intrinsic' => 1, + 'final' => 1, + 'internal' => 1, + 'public' => 1 ); + +# +# hash: memberModifiers +# An existence hash of all the acceptable class member modifiers. The keys are in all lowercase. +# +my %memberModifiers = ( 'public' => 1, + 'private' => 1, + 'protected' => 1, + 'static' => 1, + 'internal' => 1, + 'override' => 1 ); + + +# +# hash: declarationEnders +# An existence hash of all the tokens that can end a declaration. This is important because statements don't require a semicolon +# to end. The keys are in all lowercase. +# +my %declarationEnders = ( ';' => 1, + '}' => 1, + '{' => 1, + 'public' => 1, + 'private' => 1, + 'protected' => 1, + 'static' => 1, + 'internal' => 1, + 'dynamic' => 1, + 'intrinsic' => 1, + 'final' => 1, + 'override' => 1, + 'class' => 1, + 'interface' => 1, + 'var' => 1, + 'function' => 1, + 'const' => 1, + 'namespace' => 1, + 'import' => 1 ); + + +# +# var: isEscaped +# Whether the current file being parsed uses escapement. +# +my $isEscaped; + + + +################################################################################ +# Group: Interface Functions + + +# +# Function: PackageSeparator +# Returns the package separator symbol. +# +sub PackageSeparator + { return '.'; }; + + +# +# Function: EnumValues +# Returns the <EnumValuesType> that describes how the language handles enums. +# +sub EnumValues + { return ::ENUM_GLOBAL(); }; + + +# +# Function: ParseParameterLine +# Parses a prototype parameter line and returns it as a <NaturalDocs::Languages::Prototype::Parameter> object. +# +sub ParseParameterLine #(line) + { + my ($self, $line) = @_; + + if ($line =~ /^ ?\.\.\.\ (.+)$/) + { + # This puts them in the wrong fields as $1 should be the name and ... should be the type. However, this is necessary + # because the order in the source is reversed from other parameter declarations and it's more important for the output + # to match the source. + return NaturalDocs::Languages::Prototype::Parameter->New($1, undef, '...', undef, undef, undef); + } + else + { return $self->ParsePascalParameterLine($line); }; + }; + + +# +# Function: TypeBeforeParameter +# Returns whether the type appears before the parameter in prototypes. +# +sub TypeBeforeParameter + { return 0; }; + + +# +# Function: PreprocessFile +# +# If the file is escaped, strips out all unescaped code. Will translate any unescaped comments into comments surrounded by +# "\x1C\x1D\x1E\x1F" and "\x1F\x1E\x1D" characters, so chosen because they are the same character lengths as <!-- and --> +# and will not appear in normal code. +# +sub PreprocessFile + { + my ($self, $lines) = @_; + + if (!$isEscaped) + { return; }; + + use constant MODE_UNESCAPED_REGULAR => 1; + use constant MODE_UNESCAPED_PI => 2; + use constant MODE_UNESCAPED_CDATA => 3; + use constant MODE_UNESCAPED_COMMENT => 4; + use constant MODE_ESCAPED_UNKNOWN_CDATA => 5; + use constant MODE_ESCAPED_CDATA => 6; + use constant MODE_ESCAPED_NO_CDATA => 7; + + my $mode = MODE_UNESCAPED_REGULAR; + + for (my $i = 0; $i < scalar @$lines; $i++) + { + my @tokens = split(/(<[ \t]*\/?[ \t]*mx:Script[^>]*>|<\?|\?>|<\!--|-->|<\!\[CDATA\[|\]\]\>)/, $lines->[$i]); + my $newLine; + + foreach my $token (@tokens) + { + if ($mode == MODE_UNESCAPED_REGULAR) + { + if ($token eq '<?') + { $mode = MODE_UNESCAPED_PI; } + elsif ($token eq '<![CDATA[') + { $mode = MODE_UNESCAPED_CDATA; } + elsif ($token eq '<!--') + { + $mode = MODE_UNESCAPED_COMMENT; + $newLine .= "\x1C\x1D\x1E\x1F"; + } + elsif ($token =~ /^<[ \t]*mx:Script/) + { $mode = MODE_ESCAPED_UNKNOWN_CDATA; }; + } + + elsif ($mode == MODE_UNESCAPED_PI) + { + if ($token eq '?>') + { $mode = MODE_UNESCAPED_REGULAR; }; + } + + elsif ($mode == MODE_UNESCAPED_CDATA) + { + if ($token eq ']]>') + { $mode = MODE_UNESCAPED_REGULAR; }; + } + + elsif ($mode == MODE_UNESCAPED_COMMENT) + { + if ($token eq '-->') + { + $mode = MODE_UNESCAPED_REGULAR; + $newLine .= "\x1F\x1E\x1D"; + } + else + { $newLine .= $token; }; + } + + elsif ($mode == MODE_ESCAPED_UNKNOWN_CDATA) + { + if ($token eq '<![CDATA[') + { $mode = MODE_ESCAPED_CDATA; } + elsif ($token =~ /^<[ \t]*\/[ \t]*mx:Script/) + { + $mode = MODE_UNESCAPED_REGULAR; + $newLine .= '; '; + } + elsif ($token !~ /^[ \t]*$/) + { + $mode = MODE_ESCAPED_NO_CDATA; + $newLine .= $token; + }; + } + + elsif ($mode == MODE_ESCAPED_CDATA) + { + if ($token eq ']]>') + { + $mode = MODE_UNESCAPED_REGULAR; + $newLine .= '; '; + } + else + { $newLine .= $token; }; + } + + else #($mode == MODE_ESCAPED_NO_CDATA) + { + if ($token =~ /^<[ \t]*\/[ \t]*mx:Script/) + { + $mode = MODE_UNESCAPED_REGULAR; + $newLine .= '; '; + } + else + { $newLine .= $token; }; + }; + + }; + + $lines->[$i] = $newLine; + }; + }; + + +# +# Function: ParseFile +# +# Parses the passed source file, sending comments acceptable for documentation to <NaturalDocs::Parser->OnComment()>. +# +# Parameters: +# +# sourceFile - The <FileName> to parse. +# topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file. +# +# Returns: +# +# The array ( autoTopics, scopeRecord ). +# +# autoTopics - An arrayref of automatically generated topics from the file, or undef if none. +# scopeRecord - An arrayref of <NaturalDocs::Languages::Advanced::ScopeChanges>, or undef if none. +# +sub ParseFile #(sourceFile, topicsList) + { + my ($self, $sourceFile, $topicsList) = @_; + + # The \x1# comment symbols are inserted by PreprocessFile() to stand in for XML comments in escaped files. + my @parseParameters = ( [ '//' ], [ '/*', '*/', "\x1C\x1D\x1E\x1F", "\x1F\x1E\x1D" ], [ '///' ], [ '/**', '*/' ] ); + + my $extension = lc(NaturalDocs::File->ExtensionOf($sourceFile)); + $isEscaped = ($extension eq 'mxml'); + + $self->ParseForCommentsAndTokens($sourceFile, @parseParameters); + + my $tokens = $self->Tokens(); + my $index = 0; + my $lineNumber = 1; + + while ($index < scalar @$tokens) + { + if ($self->TryToSkipWhitespace(\$index, \$lineNumber) || + $self->TryToGetImport(\$index, \$lineNumber) || + $self->TryToGetClass(\$index, \$lineNumber) || + $self->TryToGetFunction(\$index, \$lineNumber) || + $self->TryToGetVariable(\$index, \$lineNumber) ) + { + # The functions above will handle everything. + } + + elsif ($tokens->[$index] eq '{') + { + $self->StartScope('}', $lineNumber, undef, undef, undef); + $index++; + } + + elsif ($tokens->[$index] eq '}') + { + if ($self->ClosingScopeSymbol() eq '}') + { $self->EndScope($lineNumber); }; + + $index++; + } + + else + { + $self->SkipToNextStatement(\$index, \$lineNumber); + }; + }; + + + # Don't need to keep these around. + $self->ClearTokens(); + + + my $autoTopics = $self->AutoTopics(); + + my $scopeRecord = $self->ScopeRecord(); + if (defined $scopeRecord && !scalar @$scopeRecord) + { $scopeRecord = undef; }; + + return ( $autoTopics, $scopeRecord ); + }; + + + +################################################################################ +# Group: Statement Parsing Functions +# All functions here assume that the current position is at the beginning of a statement. +# +# Note for developers: I am well aware that the code in these functions do not check if we're past the end of the tokens as +# often as it should. We're making use of the fact that Perl will always return undef in these cases to keep the code simpler. + + +# +# Function: TryToGetIdentifier +# +# Determines whether the position is at an identifier, and if so, skips it and returns the complete identifier as a string. Returns +# undef otherwise. +# +# Parameters: +# +# indexRef - A reference to the current token index. +# lineNumberRef - A reference to the current line number. +# allowStar - If set, allows the last identifier to be a star. +# +sub TryToGetIdentifier #(indexRef, lineNumberRef, allowStar) + { + my ($self, $indexRef, $lineNumberRef, $allowStar) = @_; + my $tokens = $self->Tokens(); + + my $index = $$indexRef; + + use constant MODE_IDENTIFIER_START => 1; + use constant MODE_IN_IDENTIFIER => 2; + use constant MODE_AFTER_STAR => 3; + + my $identifier; + my $mode = MODE_IDENTIFIER_START; + + while ($index < scalar @$tokens) + { + if ($mode == MODE_IDENTIFIER_START) + { + if ($tokens->[$index] =~ /^[a-z\$\_]/i) + { + $identifier .= $tokens->[$index]; + $index++; + + $mode = MODE_IN_IDENTIFIER; + } + elsif ($allowStar && $tokens->[$index] eq '*') + { + $identifier .= '*'; + $index++; + + $mode = MODE_AFTER_STAR; + } + else + { return undef; }; + } + + elsif ($mode == MODE_IN_IDENTIFIER) + { + if ($tokens->[$index] eq '.') + { + $identifier .= '.'; + $index++; + + $mode = MODE_IDENTIFIER_START; + } + elsif ($tokens->[$index] =~ /^[a-z0-9\$\_]/i) + { + $identifier .= $tokens->[$index]; + $index++; + } + else + { last; }; + } + + else #($mode == MODE_AFTER_STAR) + { + if ($tokens->[$index] =~ /^[a-z0-9\$\_\.]/i) + { return undef; } + else + { last; }; + }; + }; + + # We need to check again because we may have run out of tokens after a dot. + if ($mode != MODE_IDENTIFIER_START) + { + $$indexRef = $index; + return $identifier; + } + else + { return undef; }; + }; + + +# +# Function: TryToGetImport +# +# Determines whether the position is at a import statement, and if so, adds it as a Using statement to the current scope, skips +# it, and returns true. +# +sub TryToGetImport #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $index = $$indexRef; + my $lineNumber = $$lineNumberRef; + + if ($tokens->[$index] ne 'import') + { return undef; }; + + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + my $identifier = $self->TryToGetIdentifier(\$index, \$lineNumber, 1); + if (!$identifier) + { return undef; }; + + + # Currently we implement importing by stripping the last package level and treating it as a using. So "import p1.p2.p3" makes + # p1.p2 the using path, which is over-tolerant but that's okay. "import p1.p2.*" is treated the same way, but in this case it's + # not over-tolerant. If there's no dot, there's no point to including it. + + if (index($identifier, '.') != -1) + { + $identifier =~ s/\.[^\.]+$//; + $self->AddUsing( NaturalDocs::SymbolString->FromText($identifier) ); + }; + + $$indexRef = $index; + $$lineNumberRef = $lineNumber; + + return 1; + }; + + +# +# Function: TryToGetClass +# +# Determines whether the position is at a class declaration statement, and if so, generates a topic for it, skips it, and +# returns true. +# +# Supported Syntaxes: +# +# - Classes +# - Interfaces +# - Classes and interfaces with _global +# +sub TryToGetClass #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $index = $$indexRef; + my $lineNumber = $$lineNumberRef; + + my @modifiers; + + while ($tokens->[$index] =~ /^[a-z]/i && + exists $classModifiers{lc($tokens->[$index])} ) + { + push @modifiers, lc($tokens->[$index]); + $index++; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + }; + + my $type; + + if ($tokens->[$index] eq 'class' || $tokens->[$index] eq 'interface') + { + $type = $tokens->[$index]; + + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + } + else + { return undef; }; + + my $className = $self->TryToGetIdentifier(\$index, \$lineNumber); + + if (!$className) + { return undef; }; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + my @parents; + + if ($tokens->[$index] eq 'extends') + { + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + my $parent = $self->TryToGetIdentifier(\$index, \$lineNumber); + if (!$parent) + { return undef; }; + + push @parents, $parent; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + }; + + if ($type eq 'class' && $tokens->[$index] eq 'implements') + { + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + for (;;) + { + my $parent = $self->TryToGetIdentifier(\$index, \$lineNumber); + if (!$parent) + { return undef; }; + + push @parents, $parent; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + if ($tokens->[$index] ne ',') + { last; } + else + { + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + }; + }; + }; + + if ($tokens->[$index] ne '{') + { return undef; }; + + $index++; + + + # If we made it this far, we have a valid class declaration. + + my $topicType; + + if ($type eq 'interface') + { $topicType = ::TOPIC_INTERFACE(); } + else + { $topicType = ::TOPIC_CLASS(); }; + + $className =~ s/^_global.//; + + my $autoTopic = NaturalDocs::Parser::ParsedTopic->New($topicType, $className, + undef, $self->CurrentUsing(), + undef, + undef, undef, $$lineNumberRef); + + $self->AddAutoTopic($autoTopic); + NaturalDocs::Parser->OnClass($autoTopic->Package()); + + foreach my $parent (@parents) + { + NaturalDocs::Parser->OnClassParent($autoTopic->Package(), NaturalDocs::SymbolString->FromText($parent), + undef, $self->CurrentUsing(), ::RESOLVE_ABSOLUTE()); + }; + + $self->StartScope('}', $lineNumber, $autoTopic->Package()); + + $$indexRef = $index; + $$lineNumberRef = $lineNumber; + + return 1; + }; + + +# +# Function: TryToGetFunction +# +# Determines if the position is on a function declaration, and if so, generates a topic for it, skips it, and returns true. +# +# Supported Syntaxes: +# +# - Functions +# - Constructors +# - Properties +# - Functions with _global +# - Functions with namespaces +# +sub TryToGetFunction #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $index = $$indexRef; + my $lineNumber = $$lineNumberRef; + + my $startIndex = $index; + my $startLine = $lineNumber; + + my @modifiers; + my $namespace; + + while ($tokens->[$index] =~ /^[a-z]/i) + { + if ($tokens->[$index] eq 'function') + { last; } + + elsif (exists $memberModifiers{lc($tokens->[$index])}) + { + push @modifiers, lc($tokens->[$index]); + $index++; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + } + + elsif (!$namespace) + { + do + { + $namespace .= $tokens->[$index]; + $index++; + } + while ($tokens->[$index] =~ /^[a-z0-9_]/i); + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + } + + else + { last; }; + }; + + if ($tokens->[$index] ne 'function') + { return undef; }; + $index++; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + my $type; + + if ($tokens->[$index] eq 'get' || $tokens->[$index] eq 'set') + { + # This can either be a property ("function get Something()") or a function name ("function get()"). + + my $nextIndex = $index; + my $nextLineNumber = $lineNumber; + + $nextIndex++; + $self->TryToSkipWhitespace(\$nextIndex, \$nextLineNumber); + + if ($tokens->[$nextIndex] eq '(') + { + $type = ::TOPIC_FUNCTION(); + # Ignore the movement and let the code ahead pick it up as the name. + } + else + { + $type = ::TOPIC_PROPERTY(); + $index = $nextIndex; + $lineNumber = $nextLineNumber; + }; + } + else + { $type = ::TOPIC_FUNCTION(); }; + + my $name = $self->TryToGetIdentifier(\$index, \$lineNumber); + if (!$name) + { return undef; }; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + if ($tokens->[$index] ne '(') + { return undef; }; + + $index++; + $self->GenericSkipUntilAfter(\$index, \$lineNumber, ')'); + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + if ($tokens->[$index] eq ':') + { + $index++; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + $self->TryToGetIdentifier(\$index, \$lineNumber, 1); + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + }; + + + my $prototype = $self->NormalizePrototype( $self->CreateString($startIndex, $index) ); + + if ($tokens->[$index] eq '{') + { $self->GenericSkip(\$index, \$lineNumber); } + elsif (!exists $declarationEnders{$tokens->[$index]}) + { return undef; }; + + + my $scope = $self->CurrentScope(); + + if ($name =~ s/^_global.//) + { $scope = undef; }; + if ($namespace) + { $scope = NaturalDocs::SymbolString->Join($scope, $namespace); }; + + $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New($type, $name, + $scope, $self->CurrentUsing(), + $prototype, + undef, undef, $startLine)); + + + # We succeeded if we got this far. + + $$indexRef = $index; + $$lineNumberRef = $lineNumber; + + return 1; + }; + + +# +# Function: TryToGetVariable +# +# Determines if the position is on a variable declaration statement, and if so, generates a topic for each variable, skips the +# statement, and returns true. +# +# Supported Syntaxes: +# +# - Variables +# - Variables with _global +# - Variables with type * (untyped) +# - Constants +# - Variables and constants with namespaces +# +sub TryToGetVariable #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $index = $$indexRef; + my $lineNumber = $$lineNumberRef; + + my $startIndex = $index; + my $startLine = $lineNumber; + + my @modifiers; + my $namespace; + + while ($tokens->[$index] =~ /^[a-z]/i) + { + if ($tokens->[$index] eq 'var' || $tokens->[$index] eq 'const') + { last; } + + elsif (exists $memberModifiers{lc($tokens->[$index])}) + { + push @modifiers, lc($tokens->[$index]); + $index++; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + } + + elsif (!$namespace) + { + do + { + $namespace .= $tokens->[$index]; + $index++; + } + while ($tokens->[$index] =~ /^[a-z0-9_]/i); + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + } + + else + { last; }; + }; + + my $type; + + if ($tokens->[$index] eq 'var') + { $type = ::TOPIC_VARIABLE(); } + elsif ($tokens->[$index] eq 'const') + { $type = ::TOPIC_CONSTANT(); } + else + { return undef; }; + $index++; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + my $endTypeIndex = $index; + my @names; + my @types; + + for (;;) + { + my $name = $self->TryToGetIdentifier(\$index, \$lineNumber); + if (!$name) + { return undef; }; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + my $type; + + if ($tokens->[$index] eq ':') + { + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + $type = ': ' . $self->TryToGetIdentifier(\$index, \$lineNumber, 1); + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + }; + + if ($tokens->[$index] eq '=') + { + do + { + $self->GenericSkip(\$index, \$lineNumber); + } + while ($tokens->[$index] ne ',' && !exists $declarationEnders{$tokens->[$index]} && $index < scalar @$tokens); + }; + + push @names, $name; + push @types, $type; + + if ($tokens->[$index] eq ',') + { + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + } + elsif (exists $declarationEnders{$tokens->[$index]}) + { last; } + else + { return undef; }; + }; + + + # We succeeded if we got this far. + + my $prototypePrefix = $self->CreateString($startIndex, $endTypeIndex); + + for (my $i = 0; $i < scalar @names; $i++) + { + my $prototype = $self->NormalizePrototype( $prototypePrefix . ' ' . $names[$i] . $types[$i]); + my $scope = $self->CurrentScope(); + + if ($names[$i] =~ s/^_global.//) + { $scope = undef; }; + if ($namespace) + { $scope = NaturalDocs::SymbolString->Join($scope, $namespace); }; + + $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New($type, $names[$i], + $scope, $self->CurrentUsing(), + $prototype, + undef, undef, $startLine)); + }; + + $$indexRef = $index; + $$lineNumberRef = $lineNumber; + + return 1; + }; + + + +################################################################################ +# Group: Low Level Parsing Functions + + +# +# Function: GenericSkip +# +# Advances the position one place through general code. +# +# - If the position is on a string, it will skip it completely. +# - If the position is on an opening symbol, it will skip until the past the closing symbol. +# - If the position is on whitespace (including comments), it will skip it completely. +# - Otherwise it skips one token. +# +# Parameters: +# +# indexRef - A reference to the current index. +# lineNumberRef - A reference to the current line number. +# +sub GenericSkip #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + # We can ignore the scope stack because we're just skipping everything without parsing, and we need recursion anyway. + if ($tokens->[$$indexRef] eq '{') + { + $$indexRef++; + $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, '}'); + } + elsif ($tokens->[$$indexRef] eq '(') + { + $$indexRef++; + $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, ')'); + } + elsif ($tokens->[$$indexRef] eq '[') + { + $$indexRef++; + $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, ']'); + } + + elsif ($self->TryToSkipWhitespace($indexRef, $lineNumberRef) || + $self->TryToSkipString($indexRef, $lineNumberRef) || + $self->TryToSkipRegExp($indexRef, $lineNumberRef) || + $self->TryToSkipXML($indexRef, $lineNumberRef) ) + { + } + + else + { $$indexRef++; }; + }; + + +# +# Function: GenericSkipUntilAfter +# +# Advances the position via <GenericSkip()> until a specific token is reached and passed. +# +sub GenericSkipUntilAfter #(indexRef, lineNumberRef, token) + { + my ($self, $indexRef, $lineNumberRef, $token) = @_; + my $tokens = $self->Tokens(); + + while ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] ne $token) + { $self->GenericSkip($indexRef, $lineNumberRef); }; + + if ($tokens->[$$indexRef] eq "\n") + { $$lineNumberRef++; }; + $$indexRef++; + }; + + +# +# Function: IndiscriminateSkipUntilAfterSequence +# +# Advances the position indiscriminately until a specific token sequence is reached and passed. +# +sub IndiscriminateSkipUntilAfterSequence #(indexRef, lineNumberRef, token, token, ...) + { + my ($self, $indexRef, $lineNumberRef, @sequence) = @_; + my $tokens = $self->Tokens(); + + while ($$indexRef < scalar @$tokens && !$self->IsAtSequence($$indexRef, @sequence)) + { + if ($tokens->[$$indexRef] eq "\n") + { $$lineNumberRef++; }; + $$indexRef++; + }; + + if ($self->IsAtSequence($$indexRef, @sequence)) + { + $$indexRef += scalar @sequence; + foreach my $token (@sequence) + { + if ($token eq "\n") + { $$lineNumberRef++; }; + }; + }; + }; + + +# +# Function: SkipToNextStatement +# +# Advances the position via <GenericSkip()> until the next statement, which is defined as anything in <declarationEnders> not +# appearing in brackets or strings. It will always advance at least one token. +# +sub SkipToNextStatement #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + if ($tokens->[$$indexRef] eq ';') + { $$indexRef++; } + + else + { + do + { + $self->GenericSkip($indexRef, $lineNumberRef); + } + while ( $$indexRef < scalar @$tokens && + !exists $declarationEnders{$tokens->[$$indexRef]} ); + }; + }; + + +# +# Function: TryToSkipRegExp +# If the current position is on a regular expression, skip past it and return true. +# +sub TryToSkipRegExp #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + if ($tokens->[$$indexRef] eq '/') + { + # A slash can either start a regular expression or be a divide symbol. Skip backwards to see what the previous symbol is. + my $index = $$indexRef - 1; + + while ($index >= 0 && $tokens->[$index] =~ /^(?: |\t|\n)/) + { $index--; }; + + if ($index < 0 || $tokens->[$index] !~ /^\=\(\[\,]/) + { return 0; }; + + $$indexRef++; + + while ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] ne '/') + { + if ($tokens->[$$indexRef] eq '\\') + { $$indexRef += 2; } + elsif ($tokens->[$$indexRef] eq "\n") + { + $$indexRef++; + $$lineNumberRef++; + } + else + { $$indexRef++; } + }; + + if ($$indexRef < scalar @$tokens) + { + $$indexRef++; + + if ($tokens->[$$indexRef] =~ /^[gimsx]+$/i) + { $$indexRef++; }; + }; + + return 1; + } + else + { return 0; }; + }; + + +# +# Function: TryToSkipXML +# If the current position is on an XML literal, skip past it and return true. +# +sub TryToSkipXML #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + if ($tokens->[$$indexRef] eq '<') + { + # A < can either start an XML literal or be a comparison or shift operator. First check the next character for << or <=. + + my $index = $$indexRef + 1; + + while ($index < scalar @$tokens && $tokens->[$index] =~ /^[\=\<]$/) + { return 0; }; + + + # Next try the previous character. + + $index = $$indexRef - 1; + + while ($index >= 0 && $tokens->[$index] =~ /^[ |\t|\n]/) + { $index--; }; + + if ($index < 0 || $tokens->[$index] !~ /^[\=\(\[\,\>]/) + { return 0; }; + } + else + { return 0; }; + + + # Only handle the tag here if it's not an irregular XML section. + if (!$self->TryToSkipIrregularXML($indexRef, $lineNumberRef)) + { + my @tagStack; + + my ($tagType, $tagIdentifier) = $self->GetAndSkipXMLTag($indexRef, $lineNumberRef); + if ($tagType == XML_OPENING_TAG) + { push @tagStack, $tagIdentifier; }; + + while (scalar @tagStack && $$indexRef < scalar @$tokens) + { + $self->SkipToNextXMLTag($indexRef, $lineNumberRef); + ($tagType, $tagIdentifier) = $self->GetAndSkipXMLTag($indexRef, $lineNumberRef); + + if ($tagType == XML_OPENING_TAG) + { push @tagStack, $tagIdentifier; } + elsif ($tagType == XML_CLOSING_TAG && $tagIdentifier eq $tagStack[-1]) + { pop @tagStack; }; + }; + }; + + + return 1; + }; + + +# +# Function: TryToSkipIrregularXML +# +# If the current position is on an irregular XML tag, skip past it and return true. Irregular XML tags are defined as +# +# CDATA - <![CDATA[ ... ]]> +# Comments - <!-- ... --> +# PI - <? ... ?> +# +sub TryToSkipIrregularXML #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + + if ($self->IsAtSequence($$indexRef, '<', '!', '[', 'CDATA', '[')) + { + $$indexRef += 5; + $self->IndiscriminateSkipUntilAfterSequence($indexRef, $lineNumberRef, ']', ']', '>'); + return 1; + } + + elsif ($self->IsAtSequence($$indexRef, '<', '!', '-', '-')) + { + $$indexRef += 4; + $self->IndiscriminateSkipUntilAfterSequence($indexRef, $lineNumberRef, '-', '-', '>'); + return 1; + } + + elsif ($self->IsAtSequence($$indexRef, '<', '?')) + { + $$indexRef += 2; + $self->IndiscriminateSkipUntilAfterSequence($indexRef, $lineNumberRef, '?', '>'); + return 1; + } + + else + { return 0; }; + }; + + +# +# Function: GetAndSkipXMLTag +# +# Processes the XML tag at the current position, moves beyond it, and returns information about it. Assumes the position is on +# the opening angle bracket of the tag and the tag is a normal XML tag, not one of the ones handled by +# <TryToSkipIrregularXML()>. +# +# Parameters: +# +# indexRef - A reference to the index of the position of the opening angle bracket. +# lineNumberRef - A reference to the line number of the position of the opening angle bracket. +# +# Returns: +# +# The array ( tagType, name ). +# +# tagType - One of the <XML Tag Type> constants. +# identifier - The identifier of the tag. If it's an empty tag (<> or </>), this will be "(anonymous)". +# +sub GetAndSkipXMLTag #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + if ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] ne '<') + { die "Tried to call GetXMLTag when the position isn't on an opening bracket."; }; + + # Get the anonymous ones out of the way so we don't have to worry about them below, since they're rather exceptional. + + if ($self->IsAtSequence($$indexRef, '<', '>')) + { + $$indexRef += 2; + return ( XML_OPENING_TAG, '(anonymous)' ); + } + elsif ($self->IsAtSequence($$indexRef, '<', '/', '>')) + { + $$indexRef += 3; + return ( XML_CLOSING_TAG, '(anonymous)' ); + }; + + + # Grab the identifier. + + my $tagType = XML_OPENING_TAG; + my $identifier; + + $$indexRef++; + + if ($tokens->[$$indexRef] eq '/') + { + $$indexRef++; + $tagType = XML_CLOSING_TAG; + }; + + $self->TryToSkipXMLWhitespace($indexRef, $lineNumberRef); + + + # The identifier could be a native expression in braces. + + if ($tokens->[$$indexRef] eq '{') + { + my $startOfIdentifier = $$indexRef; + + $$indexRef++; + $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, '}'); + + $identifier = $self->CreateString($startOfIdentifier, $$indexRef); + } + + + # Otherwise just grab content until whitespace or the end of the tag. + + else + { + while ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] !~ /^[\/\>\ \t]$/) + { + $identifier .= $tokens->[$$indexRef]; + $$indexRef++; + }; + }; + + + # Skip to the end of the tag. + + while ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] !~ /^[\/\>]$/) + { + if ($tokens->[$$indexRef] eq '{') + { + $$indexRef++; + $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, '}'); + } + + elsif ($self->TryToSkipXMLWhitespace($indexRef, $lineNumberRef)) + { } + + # We don't need to do special handling for attribute quotes or anything like that because there's no backslashing in + # XML. It's all handled with entity characters. + else + { $$indexRef++; }; + }; + + + if ($tokens->[$$indexRef] eq '/') + { + if ($tagType == XML_OPENING_TAG) + { $tagType = XML_SELF_CONTAINED_TAG; }; + + $$indexRef++; + }; + + if ($tokens->[$$indexRef] eq '>') + { $$indexRef++; }; + + if (!$identifier) + { $identifier = '(anonymous)'; }; + + + return ( $tagType, $identifier ); + }; + + +# +# Function: SkipToNextXMLTag +# Skips to the next normal XML tag. It will not stop at elements handled by <TryToSkipIrregularXML()>. Note that if the +# position is already at an XML tag, it will not move. +# +sub SkipToNextXMLTag #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + while ($$indexRef < scalar @$tokens) + { + if ($tokens->[$$indexRef] eq '{') + { + $$indexRef++; + $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, '}'); + } + + elsif ($self->TryToSkipIrregularXML($indexRef, $lineNumberRef)) + { } + + elsif ($tokens->[$$indexRef] eq '<') + { last; } + + else + { + if ($tokens->[$$indexRef] eq "\n") + { $$lineNumberRef++; }; + + $$indexRef++; + }; + }; + }; + + +# +# Function: TryToSkipXMLWhitespace +# If the current position is on XML whitespace, skip past it and return true. +# +sub TryToSkipXMLWhitespace #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $result; + + while ($$indexRef < scalar @$tokens) + { + if ($tokens->[$$indexRef] =~ /^[ \t]/) + { + $$indexRef++; + $result = 1; + } + elsif ($tokens->[$$indexRef] eq "\n") + { + $$indexRef++; + $$lineNumberRef++; + $result = 1; + } + else + { last; }; + }; + + return $result; + }; + + +# +# Function: TryToSkipString +# If the current position is on a string delimiter, skip past the string and return true. +# +# Parameters: +# +# indexRef - A reference to the index of the position to start at. +# lineNumberRef - A reference to the line number of the position. +# +# Returns: +# +# Whether the position was at a string. +# +# Syntax Support: +# +# - Supports quotes and apostrophes. +# +sub TryToSkipString #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + + return ($self->SUPER::TryToSkipString($indexRef, $lineNumberRef, '\'') || + $self->SUPER::TryToSkipString($indexRef, $lineNumberRef, '"') ); + }; + + +# +# Function: TryToSkipWhitespace +# If the current position is on a whitespace token, a line break token, or a comment, it skips them and returns true. If there are +# a number of these in a row, it skips them all. +# +sub TryToSkipWhitespace #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $result; + + while ($$indexRef < scalar @$tokens) + { + if ($tokens->[$$indexRef] =~ /^[ \t]/) + { + $$indexRef++; + $result = 1; + } + elsif ($tokens->[$$indexRef] eq "\n") + { + $$indexRef++; + $$lineNumberRef++; + $result = 1; + } + elsif ($self->TryToSkipComment($indexRef, $lineNumberRef)) + { + $result = 1; + } + else + { last; }; + }; + + return $result; + }; + + +# +# Function: TryToSkipComment +# If the current position is on a comment, skip past it and return true. +# +sub TryToSkipComment #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + + return ( $self->TryToSkipLineComment($indexRef, $lineNumberRef) || + $self->TryToSkipMultilineComment($indexRef, $lineNumberRef) ); + }; + + +# +# Function: TryToSkipLineComment +# If the current position is on a line comment symbol, skip past it and return true. +# +sub TryToSkipLineComment #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + if ($tokens->[$$indexRef] eq '/' && $tokens->[$$indexRef+1] eq '/') + { + $self->SkipRestOfLine($indexRef, $lineNumberRef); + return 1; + } + else + { return undef; }; + }; + + +# +# Function: TryToSkipMultilineComment +# If the current position is on an opening comment symbol, skip past it and return true. +# +sub TryToSkipMultilineComment #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + if ($tokens->[$$indexRef] eq '/' && $tokens->[$$indexRef+1] eq '*') + { + $self->SkipUntilAfter($indexRef, $lineNumberRef, '*', '/'); + return 1; + } + else + { return undef; }; + }; + + +1; diff --git a/docs/tool/Modules/NaturalDocs/Languages/Ada.pm b/docs/tool/Modules/NaturalDocs/Languages/Ada.pm new file mode 100644 index 00000000..d7369ac6 --- /dev/null +++ b/docs/tool/Modules/NaturalDocs/Languages/Ada.pm @@ -0,0 +1,38 @@ +############################################################################### +# +# Class: NaturalDocs::Languages::Ada +# +############################################################################### +# +# A subclass to handle the language variations of Ada +# +############################################################################### + +# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure +# Natural Docs is licensed under the GPL + +use strict; +use integer; + +package NaturalDocs::Languages::Ada; + +use base 'NaturalDocs::Languages::Simple'; + + +# +# Function: ParseParameterLine +# Overridden because Ada uses Pascal-style parameters +# +sub ParseParameterLine #(...) + { + my ($self, @params) = @_; + return $self->SUPER::ParsePascalParameterLine(@params); + }; + +sub TypeBeforeParameter + { + return 0; + }; + + +1; diff --git a/docs/tool/Modules/NaturalDocs/Languages/Advanced.pm b/docs/tool/Modules/NaturalDocs/Languages/Advanced.pm new file mode 100644 index 00000000..8ae27bfc --- /dev/null +++ b/docs/tool/Modules/NaturalDocs/Languages/Advanced.pm @@ -0,0 +1,828 @@ +############################################################################### +# +# Class: NaturalDocs::Languages::Advanced +# +############################################################################### +# +# The base class for all languages that have full support in Natural Docs. Each one will have a custom parser capable +# of documenting undocumented aspects of the code. +# +############################################################################### + +# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure +# Natural Docs is licensed under the GPL + +use strict; +use integer; + +use NaturalDocs::Languages::Advanced::Scope; +use NaturalDocs::Languages::Advanced::ScopeChange; + +package NaturalDocs::Languages::Advanced; + +use base 'NaturalDocs::Languages::Base'; + + +############################################################################# +# Group: Implementation + +# +# Constants: Members +# +# The class is implemented as a blessed arrayref. The following constants are used as indexes. +# +# TOKENS - An arrayref of tokens used in all the <Parsing Functions>. +# SCOPE_STACK - An arrayref of <NaturalDocs::Languages::Advanced::Scope> objects serving as a scope stack for parsing. +# There will always be one available, with a symbol of undef, for the top level. +# SCOPE_RECORD - An arrayref of <NaturalDocs::Languages::Advanced::ScopeChange> objects, as generated by the scope +# stack. If there is more than one change per line, only the last is stored. +# AUTO_TOPICS - An arrayref of <NaturalDocs::Parser::ParsedTopics> generated automatically from the code. +# +use NaturalDocs::DefineMembers 'TOKENS', 'SCOPE_STACK', 'SCOPE_RECORD', 'AUTO_TOPICS'; + + +############################################################################# +# Group: Functions + +# +# Function: New +# +# Creates and returns a new object. +# +# Parameters: +# +# name - The name of the language. +# +sub New #(name) + { + my ($package, @parameters) = @_; + + my $object = $package->SUPER::New(@parameters); + $object->[TOKENS] = undef; + $object->[SCOPE_STACK] = undef; + $object->[SCOPE_RECORD] = undef; + + return $object; + }; + + +# Function: Tokens +# Returns the tokens found by <ParseForCommentsAndTokens()>. +sub Tokens + { return $_[0]->[TOKENS]; }; + +# Function: SetTokens +# Replaces the tokens. +sub SetTokens #(tokens) + { $_[0]->[TOKENS] = $_[1]; }; + +# Function: ClearTokens +# Resets the token list. You may want to do this after parsing is over to save memory. +sub ClearTokens + { $_[0]->[TOKENS] = undef; }; + +# Function: AutoTopics +# Returns the arrayref of automatically generated topics, or undef if none. +sub AutoTopics + { return $_[0]->[AUTO_TOPICS]; }; + +# Function: AddAutoTopic +# Adds a <NaturalDocs::Parser::ParsedTopic> to <AutoTopics()>. +sub AddAutoTopic #(topic) + { + my ($self, $topic) = @_; + if (!defined $self->[AUTO_TOPICS]) + { $self->[AUTO_TOPICS] = [ ]; }; + push @{$self->[AUTO_TOPICS]}, $topic; + }; + +# Function: ClearAutoTopics +# Resets the automatic topic list. Not necessary if you call <ParseForCommentsAndTokens()>. +sub ClearAutoTopics + { $_[0]->[AUTO_TOPICS] = undef; }; + +# Function: ScopeRecord +# Returns an arrayref of <NaturalDocs::Languages::Advanced::ScopeChange> objects describing how and when the scope +# changed thoughout the file. There will always be at least one entry, which will be for line 1 and undef as the scope. +sub ScopeRecord + { return $_[0]->[SCOPE_RECORD]; }; + + + +############################################################################### +# +# Group: Parsing Functions +# +# These functions are good general language building blocks. Use them to create your language-specific parser. +# +# All functions work on <Tokens()> and assume it is set by <ParseForCommentsAndTokens()>. +# + + +# +# Function: ParseForCommentsAndTokens +# +# Loads the passed file, sends all appropriate comments to <NaturalDocs::Parser->OnComment()>, and breaks the rest into +# an arrayref of tokens. Tokens are defined as +# +# - All consecutive alphanumeric and underscore characters. +# - All consecutive whitespace. +# - A single line break. It will always be "\n"; you don't have to worry about platform differences. +# - A single character not included above, which is usually a symbol. Multiple consecutive ones each get their own token. +# +# The result will be placed in <Tokens()>. +# +# Parameters: +# +# sourceFile - The source <FileName> to load and parse. +# lineCommentSymbols - An arrayref of symbols that designate line comments, or undef if none. +# blockCommentSymbols - An arrayref of symbol pairs that designate multiline comments, or undef if none. Symbol pairs are +# designated as two consecutive array entries, the opening symbol appearing first. +# javadocLineCommentSymbols - An arrayref of symbols that designate the start of a JavaDoc comment, or undef if none. +# javadocBlockCommentSymbols - An arrayref of symbol pairs that designate multiline JavaDoc comments, or undef if none. +# +# Notes: +# +# - This function automatically calls <ClearAutoTopics()> and <ClearScopeStack()>. You only need to call those functions +# manually if you override this one. +# - To save parsing time, all comment lines sent to <NaturalDocs::Parser->OnComment()> will be replaced with blank lines +# in <Tokens()>. It's all the same to most languages. +# +sub ParseForCommentsAndTokens #(FileName sourceFile, string[] lineCommentSymbols, string[] blockCommentSymbols, string[] javadocLineCommentSymbols, string[] javadocBlockCommentSymbols) + { + my ($self, $sourceFile, $lineCommentSymbols, $blockCommentSymbols, + $javadocLineCommentSymbols, $javadocBlockCommentSymbols) = @_; + + open(SOURCEFILEHANDLE, '<' . $sourceFile) + or die "Couldn't open input file " . $sourceFile . "\n"; + + my $tokens = [ ]; + $self->SetTokens($tokens); + + # For convenience. + $self->ClearAutoTopics(); + $self->ClearScopeStack(); + + + # Load and preprocess the file + + my @lines; + my $line = <SOURCEFILEHANDLE>; + + # On the very first line, remove a Unicode BOM if present. Information on it available at: + # http://www.unicode.org/faq/utf_bom.html#BOM + $line =~ s/^\xEF\xBB\xBF//; + + while (defined $line) + { + ::XChomp(\$line); + push @lines, $line; + + $line = <SOURCEFILEHANDLE>; + }; + + close(SOURCEFILEHANDLE); + + $self->PreprocessFile(\@lines); + + + # Go through the file + + my $lineIndex = 0; + + while ($lineIndex < scalar @lines) + { + $line = $lines[$lineIndex]; + + my @commentLines; + my $commentLineNumber; + my $isJavaDoc; + my $closingSymbol; + + + # Retrieve single line comments. This leaves $lineIndex at the next line. + + if ( ($isJavaDoc = $self->StripOpeningJavaDocSymbols(\$line, $javadocLineCommentSymbols)) || + $self->StripOpeningSymbols(\$line, $lineCommentSymbols)) + { + $commentLineNumber = $lineIndex + 1; + + do + { + push @commentLines, $line; + push @$tokens, "\n"; + + $lineIndex++; + + if ($lineIndex >= scalar @lines) + { goto EndDo; }; + + $line = $lines[$lineIndex]; + } + while ($self->StripOpeningSymbols(\$line, $lineCommentSymbols)); + + EndDo: # I hate Perl sometimes. + } + + + # Retrieve multiline comments. This leaves $lineIndex at the next line. + + elsif ( ($isJavaDoc = $self->StripOpeningJavaDocBlockSymbols(\$line, $javadocBlockCommentSymbols)) || + ($closingSymbol = $self->StripOpeningBlockSymbols(\$line, $blockCommentSymbols)) ) + { + $commentLineNumber = $lineIndex + 1; + + if ($isJavaDoc) + { $closingSymbol = $isJavaDoc; }; + + # Note that it is possible for a multiline comment to start correctly but not end so. We want those comments to stay in + # the code. For example, look at this prototype with this splint annotation: + # + # int get_array(integer_t id, + # /*@out@*/ array_t array); + # + # The annotation starts correctly but doesn't end so because it is followed by code on the same line. + + my ($lineRemainder, $isMultiLine); + + for (;;) + { + $lineRemainder = $self->StripClosingSymbol(\$line, $closingSymbol); + + push @commentLines, $line; + + # If we found an end comment symbol... + if (defined $lineRemainder) + { last; }; + + push @$tokens, "\n"; + $lineIndex++; + $isMultiLine = 1; + + if ($lineIndex >= scalar @lines) + { last; }; + + $line = $lines[$lineIndex]; + }; + + if ($lineRemainder !~ /^[ \t]*$/) + { + # If there was something past the closing symbol this wasn't an acceptable comment. + + if ($isMultiLine) + { $self->TokenizeLine($lineRemainder); } + else + { + # We go back to the original line if it wasn't a multiline comment because we want the comment to stay in the + # code. Otherwise the /*@out@*/ from the example would be removed. + $self->TokenizeLine($lines[$lineIndex]); + }; + + @commentLines = ( ); + } + else + { + push @$tokens, "\n"; + }; + + $lineIndex++; + } + + + # Otherwise just add it to the code. + + else + { + $self->TokenizeLine($line); + $lineIndex++; + }; + + + # If there were comments, send them to Parser->OnComment(). + + if (scalar @commentLines) + { + NaturalDocs::Parser->OnComment(\@commentLines, $commentLineNumber, $isJavaDoc); + @commentLines = ( ); + $isJavaDoc = undef; + }; + + # $lineIndex was incremented by the individual code paths above. + + }; # while ($lineIndex < scalar @lines) + }; + + +# +# Function: PreprocessFile +# +# An overridable function if you'd like to preprocess the file before it goes into <ParseForCommentsAndTokens()>. +# +# Parameters: +# +# lines - An arrayref to the file's lines. Each line has its line break stripped off, but is otherwise untouched. +# +sub PreprocessFile #(lines) + { + }; + + +# +# Function: TokenizeLine +# +# Converts the passed line to tokens as described in <ParseForCommentsAndTokens> and adds them to <Tokens()>. Also +# adds a line break token after it. +# +sub TokenizeLine #(line) + { + my ($self, $line) = @_; + push @{$self->Tokens()}, $line =~ /(\w+|[ \t]+|.)/g, "\n"; + }; + + +# +# Function: TryToSkipString +# +# If the position is on a string delimiter, moves the position to the token following the closing delimiter, or past the end of the +# tokens if there is none. Assumes all other characters are allowed in the string, the delimiter itself is allowed if it's preceded by +# a backslash, and line breaks are allowed in the string. +# +# Parameters: +# +# indexRef - A reference to the position's index into <Tokens()>. +# lineNumberRef - A reference to the position's line number. +# openingDelimiter - The opening string delimiter, such as a quote or an apostrophe. +# closingDelimiter - The closing string delimiter, if different. If not defined, assumes the same as openingDelimiter. +# startContentIndexRef - A reference to a variable in which to store the index of the first token of the string's content. +# May be undef. +# endContentIndexRef - A reference to a variable in which to store the index of the end of the string's content, which is one +# past the last index of content. May be undef. +# +# Returns: +# +# Whether the position was on the passed delimiter or not. The index, line number, and content index ref variables will be +# updated only if true. +# +sub TryToSkipString #(indexRef, lineNumberRef, openingDelimiter, closingDelimiter, startContentIndexRef, endContentIndexRef) + { + my ($self, $index, $lineNumber, $openingDelimiter, $closingDelimiter, $startContentIndexRef, $endContentIndexRef) = @_; + my $tokens = $self->Tokens(); + + if (!defined $closingDelimiter) + { $closingDelimiter = $openingDelimiter; }; + + if ($tokens->[$$index] ne $openingDelimiter) + { return undef; }; + + + $$index++; + if (defined $startContentIndexRef) + { $$startContentIndexRef = $$index; }; + + while ($$index < scalar @$tokens) + { + if ($tokens->[$$index] eq "\\") + { + # Skip the token after it. + $$index += 2; + } + elsif ($tokens->[$$index] eq "\n") + { + $$lineNumber++; + $$index++; + } + elsif ($tokens->[$$index] eq $closingDelimiter) + { + if (defined $endContentIndexRef) + { $$endContentIndexRef = $$index; }; + + $$index++; + last; + } + else + { + $$index++; + }; + }; + + if ($$index >= scalar @$tokens && defined $endContentIndexRef) + { $$endContentIndexRef = scalar @$tokens; }; + + return 1; + }; + + +# +# Function: SkipRestOfLine +# +# Moves the position to the token following the next line break, or past the end of the tokens array if there is none. Useful for +# line comments. +# +# Note that it skips blindly. It assumes there cannot be anything of interest, such as a string delimiter, between the position +# and the end of the line. +# +# Parameters: +# +# indexRef - A reference to the position's index into <Tokens()>. +# lineNumberRef - A reference to the position's line number. + +sub SkipRestOfLine #(indexRef, lineNumberRef) + { + my ($self, $index, $lineNumber) = @_; + my $tokens = $self->Tokens(); + + while ($$index < scalar @$tokens) + { + if ($tokens->[$$index] eq "\n") + { + $$lineNumber++; + $$index++; + last; + } + else + { + $$index++; + }; + }; + }; + + +# +# Function: SkipUntilAfter +# +# Moves the position to the token following the next occurance of a particular token sequence, or past the end of the tokens +# array if it never occurs. Useful for multiline comments. +# +# Note that it skips blindly. It assumes there cannot be anything of interest, such as a string delimiter, between the position +# and the end of the line. +# +# Parameters: +# +# indexRef - A reference to the position's index. +# lineNumberRef - A reference to the position's line number. +# token - A token that must be matched. Can be specified multiple times to match a sequence of tokens. +# +sub SkipUntilAfter #(indexRef, lineNumberRef, token, token, ...) + { + my ($self, $index, $lineNumber, @target) = @_; + my $tokens = $self->Tokens(); + + while ($$index < scalar @$tokens) + { + if ($tokens->[$$index] eq $target[0] && ($$index + scalar @target) <= scalar @$tokens) + { + my $match = 1; + + for (my $i = 1; $i < scalar @target; $i++) + { + if ($tokens->[$$index+$i] ne $target[$i]) + { + $match = 0; + last; + }; + }; + + if ($match) + { + $$index += scalar @target; + return; + }; + }; + + if ($tokens->[$$index] eq "\n") + { + $$lineNumber++; + $$index++; + } + else + { + $$index++; + }; + }; + }; + + +# +# Function: IsFirstLineToken +# +# Returns whether the position is at the first token of a line, not including whitespace. +# +# Parameters: +# +# index - The index of the position. +# +sub IsFirstLineToken #(index) + { + my ($self, $index) = @_; + my $tokens = $self->Tokens(); + + if ($index == 0) + { return 1; }; + + $index--; + + if ($tokens->[$index] =~ /^[ \t]/) + { $index--; }; + + if ($index <= 0 || $tokens->[$index] eq "\n") + { return 1; } + else + { return undef; }; + }; + + +# +# Function: IsLastLineToken +# +# Returns whether the position is at the last token of a line, not including whitespace. +# +# Parameters: +# +# index - The index of the position. +# +sub IsLastLineToken #(index) + { + my ($self, $index) = @_; + my $tokens = $self->Tokens(); + + do + { $index++; } + while ($index < scalar @$tokens && $tokens->[$index] =~ /^[ \t]/); + + if ($index >= scalar @$tokens || $tokens->[$index] eq "\n") + { return 1; } + else + { return undef; }; + }; + + +# +# Function: IsAtSequence +# +# Returns whether the position is at a sequence of tokens. +# +# Parameters: +# +# index - The index of the position. +# token - A token to match. Specify multiple times to specify the sequence. +# +sub IsAtSequence #(index, token, token, token ...) + { + my ($self, $index, @target) = @_; + my $tokens = $self->Tokens(); + + if ($index + scalar @target > scalar @$tokens) + { return undef; }; + + for (my $i = 0; $i < scalar @target; $i++) + { + if ($tokens->[$index + $i] ne $target[$i]) + { return undef; }; + }; + + return 1; + }; + + +# +# Function: IsBackslashed +# +# Returns whether the position is after a backslash. +# +# Parameters: +# +# index - The index of the postition. +# +sub IsBackslashed #(index) + { + my ($self, $index) = @_; + my $tokens = $self->Tokens(); + + if ($index > 0 && $tokens->[$index - 1] eq "\\") + { return 1; } + else + { return undef; }; + }; + + + +############################################################################### +# +# Group: Scope Functions +# +# These functions provide a nice scope stack implementation for language-specific parsers to use. The default implementation +# makes the following assumptions. +# +# - Packages completely replace one another, rather than concatenating. You need to concatenate manually if that's the +# behavior. +# +# - Packages inherit, so if a scope level doesn't set its own, the package is the same as the parent scope's. +# + + +# +# Function: ClearScopeStack +# +# Clears the scope stack for a new file. Not necessary if you call <ParseForCommentsAndTokens()>. +# +sub ClearScopeStack + { + my ($self) = @_; + $self->[SCOPE_STACK] = [ NaturalDocs::Languages::Advanced::Scope->New(undef, undef) ]; + $self->[SCOPE_RECORD] = [ NaturalDocs::Languages::Advanced::ScopeChange->New(undef, 1) ]; + }; + + +# +# Function: StartScope +# +# Records a new scope level. +# +# Parameters: +# +# closingSymbol - The closing symbol of the scope. +# lineNumber - The line number where the scope begins. +# package - The package <SymbolString> of the scope. Undef means no change. +# +sub StartScope #(closingSymbol, lineNumber, package) + { + my ($self, $closingSymbol, $lineNumber, $package) = @_; + + push @{$self->[SCOPE_STACK]}, + NaturalDocs::Languages::Advanced::Scope->New($closingSymbol, $package, $self->CurrentUsing()); + + $self->AddToScopeRecord($self->CurrentScope(), $lineNumber); + }; + + +# +# Function: EndScope +# +# Records the end of the current scope level. Note that this is blind; you need to manually check <ClosingScopeSymbol()> if +# you need to determine if it is correct to do so. +# +# Parameters: +# +# lineNumber - The line number where the scope ends. +# +sub EndScope #(lineNumber) + { + my ($self, $lineNumber) = @_; + + if (scalar @{$self->[SCOPE_STACK]} > 1) + { pop @{$self->[SCOPE_STACK]}; }; + + $self->AddToScopeRecord($self->CurrentScope(), $lineNumber); + }; + + +# +# Function: ClosingScopeSymbol +# +# Returns the symbol that ends the current scope level, or undef if we are at the top level. +# +sub ClosingScopeSymbol + { + my ($self) = @_; + return $self->[SCOPE_STACK]->[-1]->ClosingSymbol(); + }; + + +# +# Function: CurrentScope +# +# Returns the current calculated scope, or undef if global. The default implementation just returns <CurrentPackage()>. This +# is a separate function because C++ may need to track namespaces and classes separately, and so the current scope would +# be a concatenation of them. +# +sub CurrentScope + { + return $_[0]->CurrentPackage(); + }; + + +# +# Function: CurrentPackage +# +# Returns the current calculated package or class, or undef if none. +# +sub CurrentPackage + { + my ($self) = @_; + + my $package; + + for (my $index = scalar @{$self->[SCOPE_STACK]} - 1; $index >= 0 && !defined $package; $index--) + { + $package = $self->[SCOPE_STACK]->[$index]->Package(); + }; + + return $package; + }; + + +# +# Function: SetPackage +# +# Sets the package for the current scope level. +# +# Parameters: +# +# package - The new package <SymbolString>. +# lineNumber - The line number the new package starts on. +# +sub SetPackage #(package, lineNumber) + { + my ($self, $package, $lineNumber) = @_; + $self->[SCOPE_STACK]->[-1]->SetPackage($package); + + $self->AddToScopeRecord($self->CurrentScope(), $lineNumber); + }; + + +# +# Function: CurrentUsing +# +# Returns the current calculated arrayref of <SymbolStrings> from Using statements, or undef if none. +# +sub CurrentUsing + { + my ($self) = @_; + return $self->[SCOPE_STACK]->[-1]->Using(); + }; + + +# +# Function: AddUsing +# +# Adds a Using <SymbolString> to the current scope. +# +sub AddUsing #(using) + { + my ($self, $using) = @_; + $self->[SCOPE_STACK]->[-1]->AddUsing($using); + }; + + + +############################################################################### +# Group: Support Functions + + +# +# Function: AddToScopeRecord +# +# Adds a change to the scope record, condensing unnecessary entries. +# +# Parameters: +# +# newScope - What the scope <SymbolString> changed to. +# lineNumber - Where the scope changed. +# +sub AddToScopeRecord #(newScope, lineNumber) + { + my ($self, $scope, $lineNumber) = @_; + my $scopeRecord = $self->ScopeRecord(); + + if ($scope ne $scopeRecord->[-1]->Scope()) + { + if ($scopeRecord->[-1]->LineNumber() == $lineNumber) + { $scopeRecord->[-1]->SetScope($scope); } + else + { push @$scopeRecord, NaturalDocs::Languages::Advanced::ScopeChange->New($scope, $lineNumber); }; + }; + }; + + +# +# Function: CreateString +# +# Converts the specified tokens into a string and returns it. +# +# Parameters: +# +# startIndex - The starting index to convert. +# endIndex - The ending index, which is *not inclusive*. +# +# Returns: +# +# The string. +# +sub CreateString #(startIndex, endIndex) + { + my ($self, $startIndex, $endIndex) = @_; + my $tokens = $self->Tokens(); + + my $string; + + while ($startIndex < $endIndex && $startIndex < scalar @$tokens) + { + $string .= $tokens->[$startIndex]; + $startIndex++; + }; + + return $string; + }; + + +1; diff --git a/docs/tool/Modules/NaturalDocs/Languages/Advanced/Scope.pm b/docs/tool/Modules/NaturalDocs/Languages/Advanced/Scope.pm new file mode 100644 index 00000000..e1e50a95 --- /dev/null +++ b/docs/tool/Modules/NaturalDocs/Languages/Advanced/Scope.pm @@ -0,0 +1,95 @@ +############################################################################### +# +# Class: NaturalDocs::Languages::Advanced::Scope +# +############################################################################### +# +# A class used to store a scope level. +# +############################################################################### + +# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure +# Natural Docs is licensed under the GPL + +use strict; +use integer; + +package NaturalDocs::Languages::Advanced::Scope; + +# +# Constants: Implementation +# +# The object is implemented as a blessed arrayref. The constants below are used as indexes. +# +# CLOSING_SYMBOL - The closing symbol character of the scope. +# PACKAGE - The package <SymbolString> of the scope. +# USING - An arrayref of <SymbolStrings> for using statements, or undef if none. +# +use NaturalDocs::DefineMembers 'CLOSING_SYMBOL', 'PACKAGE', 'USING'; +# Dependency: New() depends on the order of these constants as well as that there is no inherited members. + + +# +# Function: New +# +# Creates and returns a new object. +# +# Parameters: +# +# closingSymbol - The closing symbol character of the scope. +# package - The package <SymbolString> of the scope. +# using - An arrayref of using <SymbolStrings>, or undef if none. The contents of the array will be duplicated. +# +# If package is set to undef, it is assumed that it inherits the value of the previous scope on the stack. +# +sub New #(closingSymbol, package, using) + { + # Dependency: This depends on the order of the parameters matching the constants, and that there are no inherited + # members. + my $package = shift; + + my $object = [ @_ ]; + bless $object, $package; + + if (defined $object->[USING]) + { $object->[USING] = [ @{$object->[USING]} ]; }; + + return $object; + }; + + +# Function: ClosingSymbol +# Returns the closing symbol character of the scope. +sub ClosingSymbol + { return $_[0]->[CLOSING_SYMBOL]; }; + +# Function: Package +# Returns the package <SymbolString> of the scope, or undef if none. +sub Package + { return $_[0]->[PACKAGE]; }; + +# Function: SetPackage +# Sets the package <SymbolString> of the scope. +sub SetPackage #(package) + { $_[0]->[PACKAGE] = $_[1]; }; + +# Function: Using +# Returns an arrayref of <SymbolStrings> for using statements, or undef if none +sub Using + { return $_[0]->[USING]; }; + +# Function: AddUsing +# Adds a <SymbolString> to the <Using()> array. +sub AddUsing #(using) + { + my ($self, $using) = @_; + + if (!defined $self->[USING]) + { $self->[USING] = [ ]; }; + + push @{$self->[USING]}, $using; + }; + + + +1; diff --git a/docs/tool/Modules/NaturalDocs/Languages/Advanced/ScopeChange.pm b/docs/tool/Modules/NaturalDocs/Languages/Advanced/ScopeChange.pm new file mode 100644 index 00000000..24de5503 --- /dev/null +++ b/docs/tool/Modules/NaturalDocs/Languages/Advanced/ScopeChange.pm @@ -0,0 +1,70 @@ +############################################################################### +# +# Class: NaturalDocs::Languages::Advanced::ScopeChange +# +############################################################################### +# +# A class used to store a scope change. +# +############################################################################### + +# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure +# Natural Docs is licensed under the GPL + +use strict; +use integer; + +package NaturalDocs::Languages::Advanced::ScopeChange; + +# +# Constants: Implementation +# +# The object is implemented as a blessed arrayref. The constants below are used as indexes. +# +# SCOPE - The new scope <SymbolString>. +# LINE_NUMBER - The line number of the change. +# +use NaturalDocs::DefineMembers 'SCOPE', 'LINE_NUMBER'; +# Dependency: New() depends on the order of these constants as well as that there is no inherited members. + + +# +# Function: New +# +# Creates and returns a new object. +# +# Parameters: +# +# scope - The <SymbolString> the scope was changed to. +# lineNumber - What line it occurred on. +# +sub New #(scope, lineNumber) + { + # Dependency: This depends on the order of the parameters matching the constants, and that there are no inherited + # members. + my $self = shift; + + my $object = [ @_ ]; + bless $object, $self; + + return $object; + }; + + +# Function: Scope +# Returns the <SymbolString> the scope was changed to. +sub Scope + { return $_[0]->[SCOPE]; }; + +# Function: SetScope +# Replaces the <SymbolString> the scope was changed to. +sub SetScope #(scope) + { $_[0]->[SCOPE] = $_[1]; }; + +# Function: LineNumber +# Returns the line number of the change. +sub LineNumber + { return $_[0]->[LINE_NUMBER]; }; + + +1; diff --git a/docs/tool/Modules/NaturalDocs/Languages/Base.pm b/docs/tool/Modules/NaturalDocs/Languages/Base.pm new file mode 100644 index 00000000..f89b7045 --- /dev/null +++ b/docs/tool/Modules/NaturalDocs/Languages/Base.pm @@ -0,0 +1,832 @@ +############################################################################### +# +# Class: NaturalDocs::Languages::Base +# +############################################################################### +# +# A base class for all programming language parsers. +# +############################################################################### + +# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure +# Natural Docs is licensed under the GPL + +use strict; +use integer; + +package NaturalDocs::Languages::Base; + +use NaturalDocs::DefineMembers 'NAME', 'Name()', + 'EXTENSIONS', 'Extensions()', 'SetExtensions() duparrayref', + 'SHEBANG_STRINGS', 'ShebangStrings()', 'SetShebangStrings() duparrayref', + 'IGNORED_PREFIXES', + 'ENUM_VALUES'; + +use base 'Exporter'; +our @EXPORT = ('ENUM_GLOBAL', 'ENUM_UNDER_TYPE', 'ENUM_UNDER_PARENT'); + + +# +# Constants: EnumValuesType +# +# How enum values are handled in the language. +# +# ENUM_GLOBAL - Values are always global and thus 'value'. +# ENUM_UNDER_TYPE - Values are under the type in the hierarchy, and thus 'package.enum.value'. +# ENUM_UNDER_PARENT - Values are under the parent in the hierarchy, putting them on the same level as the enum itself. Thus +# 'package.value'. +# +use constant ENUM_GLOBAL => 1; +use constant ENUM_UNDER_TYPE => 2; +use constant ENUM_UNDER_PARENT => 3; + + +# +# Handle: SOURCEFILEHANDLE +# +# The handle of the source file currently being parsed. +# + + +# +# Function: New +# +# Creates and returns a new object. +# +# Parameters: +# +# name - The name of the language. +# +sub New #(name) + { + my ($selfPackage, $name) = @_; + + my $object = [ ]; + + $object->[NAME] = $name; + + bless $object, $selfPackage; + return $object; + }; + + +# +# Functions: Members +# +# Name - Returns the language's name. +# Extensions - Returns an arrayref of the language's file extensions, or undef if none. +# SetExtensions - Replaces the arrayref of the language's file extensions. +# ShebangStrings - Returns an arrayref of the language's shebang strings, or undef if none. +# SetShebangStrings - Replaces the arrayref of the language's shebang strings. +# + +# +# Function: PackageSeparator +# Returns the language's package separator string. +# +sub PackageSeparator + { return '.'; }; + +# +# Function: PackageSeparatorWasSet +# Returns whether the language's package separator string was ever changed from the default. +# +sub PackageSeparatorWasSet + { return 0; }; + + +# +# Function: EnumValues +# Returns the <EnumValuesType> that describes how the language handles enums. +# +sub EnumValues + { return ENUM_GLOBAL; }; + + +# +# Function: IgnoredPrefixesFor +# +# Returns an arrayref of ignored prefixes for the passed <TopicType>, or undef if none. The array is sorted so that the longest +# prefixes are first. +# +sub IgnoredPrefixesFor #(type) + { + my ($self, $type) = @_; + + if (defined $self->[IGNORED_PREFIXES]) + { return $self->[IGNORED_PREFIXES]->{$type}; } + else + { return undef; }; + }; + + +# +# Function: SetIgnoredPrefixesFor +# +# Replaces the arrayref of ignored prefixes for the passed <TopicType>. +# +sub SetIgnoredPrefixesFor #(type, prefixes) + { + my ($self, $type, $prefixesRef) = @_; + + if (!defined $self->[IGNORED_PREFIXES]) + { $self->[IGNORED_PREFIXES] = { }; }; + + if (!defined $prefixesRef) + { delete $self->[IGNORED_PREFIXES]->{$type}; } + else + { + my $prefixes = [ @$prefixesRef ]; + + # Sort prefixes to be longest to shortest. + @$prefixes = sort { length $b <=> length $a } @$prefixes; + + $self->[IGNORED_PREFIXES]->{$type} = $prefixes; + }; + }; + + +# +# Function: HasIgnoredPrefixes +# +# Returns whether the language has any ignored prefixes at all. +# +sub HasIgnoredPrefixes + { return defined $_[0]->[IGNORED_PREFIXES]; }; + + +# +# Function: CopyIgnoredPrefixesOf +# +# Copies all the ignored prefix settings of the passed <NaturalDocs::Languages::Base> object. +# +sub CopyIgnoredPrefixesOf #(language) + { + my ($self, $language) = @_; + + if ($language->HasIgnoredPrefixes()) + { + $self->[IGNORED_PREFIXES] = { }; + + while (my ($topicType, $prefixes) = each %{$language->[IGNORED_PREFIXES]}) + { + $self->[IGNORED_PREFIXES]->{$topicType} = [ @$prefixes ]; + }; + }; + }; + + + +############################################################################### +# Group: Parsing Functions + + +# +# Function: ParseFile +# +# Parses the passed source file, sending comments acceptable for documentation to <NaturalDocs::Parser->OnComment()>. +# This *must* be defined by a subclass. +# +# Parameters: +# +# sourceFile - The <FileName> of the source file to parse. +# topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file. +# +# Returns: +# +# The array ( autoTopics, scopeRecord ). +# +# autoTopics - An arrayref of automatically generated <NaturalDocs::Parser::ParsedTopics> from the file, or undef if none. +# scopeRecord - An arrayref of <NaturalDocs::Languages::Advanced::ScopeChanges>, or undef if none. +# + + +# +# Function: ParsePrototype +# +# Parses the prototype and returns it as a <NaturalDocs::Languages::Prototype> object. +# +# Parameters: +# +# type - The <TopicType>. +# prototype - The text prototype. +# +# Returns: +# +# A <NaturalDocs::Languages::Prototype> object. +# +sub ParsePrototype #(type, prototype) + { + my ($self, $type, $prototype) = @_; + + my $isClass = NaturalDocs::Topics->TypeInfo($type)->ClassHierarchy(); + + if ($prototype !~ /\(.*[^ ].*\)/ && (!$isClass || $prototype !~ /\{.*[^ ].*\}/)) + { + my $object = NaturalDocs::Languages::Prototype->New($prototype); + return $object; + }; + + + # Parse the parameters out of the prototype. + + my @tokens = $prototype =~ /([^\(\)\[\]\{\}\<\>\'\"\,\;]+|.)/g; + + my $parameter; + my @parameterLines; + + my @symbolStack; + my $finishedParameters; + + my ($beforeParameters, $afterParameters); + + foreach my $token (@tokens) + { + if ($finishedParameters) + { $afterParameters .= $token; } + + elsif ($symbolStack[-1] eq '\'' || $symbolStack[-1] eq '"') + { + if ($symbolStack[0] eq '(' || ($isClass && $symbolStack[0] eq '{')) + { $parameter .= $token; } + else + { $beforeParameters .= $token; }; + + if ($token eq $symbolStack[-1]) + { pop @symbolStack; }; + } + + elsif ($token =~ /^[\(\[\{\<\'\"]$/) + { + if ($symbolStack[0] eq '(' || ($isClass && $symbolStack[0] eq '{')) + { $parameter .= $token; } + else + { $beforeParameters .= $token; }; + + push @symbolStack, $token; + } + + elsif ( ($token eq ')' && $symbolStack[-1] eq '(') || + ($token eq ']' && $symbolStack[-1] eq '[') || + ($token eq '}' && $symbolStack[-1] eq '{') || + ($token eq '>' && $symbolStack[-1] eq '<') ) + { + if ($symbolStack[0] eq '(') + { + if ($token eq ')' && scalar @symbolStack == 1) + { + if ($parameter ne ' ') + { push @parameterLines, $parameter; }; + + $finishedParameters = 1; + $afterParameters .= $token; + } + else + { $parameter .= $token; }; + } + elsif ($isClass && $symbolStack[0] eq '{') + { + if ($token eq '}' && scalar @symbolStack == 1) + { + if ($parameter ne ' ') + { push @parameterLines, $parameter; }; + + $finishedParameters = 1; + $afterParameters .= $token; + } + else + { $parameter .= $token; }; + } + else + { + $beforeParameters .= $token; + }; + + pop @symbolStack; + } + + elsif ($token eq ',' || $token eq ';') + { + if ($symbolStack[0] eq '(' || ($isClass && $symbolStack[0] eq '{')) + { + if (scalar @symbolStack == 1) + { + push @parameterLines, $parameter . $token; + $parameter = undef; + } + else + { + $parameter .= $token; + }; + } + else + { + $beforeParameters .= $token; + }; + } + + else + { + if ($symbolStack[0] eq '(' || ($isClass && $symbolStack[0] eq '{')) + { $parameter .= $token; } + else + { $beforeParameters .= $token; }; + }; + }; + + foreach my $part (\$beforeParameters, \$afterParameters) + { + $$part =~ s/^ //; + $$part =~ s/ $//; + }; + + my $prototypeObject = NaturalDocs::Languages::Prototype->New($beforeParameters, $afterParameters); + + + # Parse the actual parameters. + + foreach my $parameterLine (@parameterLines) + { + $prototypeObject->AddParameter( $self->ParseParameterLine($parameterLine) ); + }; + + return $prototypeObject; + }; + + +# +# Function: ParseParameterLine +# +# Parses a prototype parameter line and returns it as a <NaturalDocs::Languages::Prototype::Parameter> object. +# +# This vesion assumes a C++ style line. If you need a Pascal style line, override this function to forward to +# <ParsePascalParameterLine()>. +# +# > Function(parameter, type parameter, type parameter = value); +# +sub ParseParameterLine #(line) + { + my ($self, $line) = @_; + + $line =~ s/^ //; + $line =~ s/ $//; + + my @tokens = $line =~ /([^ \(\)\{\}\[\]\<\>\'\"\=]+|.)/g; + + my @symbolStack; + my @parameterWords = ( undef ); + my ($defaultValue, $defaultValuePrefix, $inDefaultValue); + + foreach my $token (@tokens) + { + if ($inDefaultValue) + { $defaultValue .= $token; } + + elsif ($symbolStack[-1] eq '\'' || $symbolStack[-1] eq '"') + { + $parameterWords[-1] .= $token; + + if ($token eq $symbolStack[-1]) + { pop @symbolStack; }; + } + + elsif ($token =~ /^[\(\[\{\<\'\"]$/) + { + push @symbolStack, $token; + $parameterWords[-1] .= $token; + } + + elsif ( ($token eq ')' && $symbolStack[-1] eq '(') || + ($token eq ']' && $symbolStack[-1] eq '[') || + ($token eq '}' && $symbolStack[-1] eq '{') || + ($token eq '>' && $symbolStack[-1] eq '<') ) + { + pop @symbolStack; + $parameterWords[-1] .= $token; + } + + elsif ($token eq ' ') + { + if (!scalar @symbolStack) + { push @parameterWords, undef; } + else + { $parameterWords[-1] .= $token; }; + } + + elsif ($token eq '=') + { + if (!scalar @symbolStack) + { + $defaultValuePrefix = $token; + $inDefaultValue = 1; + } + else + { $parameterWords[-1] .= $token; }; + } + + else + { + $parameterWords[-1] .= $token; + }; + }; + + my ($name, $namePrefix, $type, $typePrefix); + + if (!$parameterWords[-1]) + { pop @parameterWords; }; + + $name = pop @parameterWords; + + if ($parameterWords[-1]=~ /([\*\&]+)$/) + { + $namePrefix = $1; + $parameterWords[-1] = substr($parameterWords[-1], 0, 0 - length($namePrefix)); + $parameterWords[-1] =~ s/ $//; + + if (!$parameterWords[-1]) + { pop @parameterWords; }; + } + elsif ($name =~ /^([\*\&]+)/) + { + $namePrefix = $1; + $name = substr($name, length($namePrefix)); + $name =~ s/^ //; + }; + + $type = pop @parameterWords; + $typePrefix = join(' ', @parameterWords); + + if ($typePrefix) + { $typePrefix .= ' '; }; + + if ($type =~ /^([a-z0-9_\:\.]+(?:\.|\:\:))[a-z0-9_]/i) + { + my $attachedTypePrefix = $1; + + $typePrefix .= $attachedTypePrefix; + $type = substr($type, length($attachedTypePrefix)); + }; + + $defaultValue =~ s/ $//; + + return NaturalDocs::Languages::Prototype::Parameter->New($type, $typePrefix, $name, $namePrefix, + $defaultValue, $defaultValuePrefix); + }; + + +# +# Function: ParsePascalParameterLine +# +# Parses a Pascal-like prototype parameter line and returns it as a <NaturalDocs::Languages::Prototype::Parameter> object. +# Pascal lines are as follows: +# +# > Function (name: type; name, name: type := value) +# +# Also supports ActionScript lines +# +# > Function (name: type, name, name: type = value) +# +sub ParsePascalParameterLine #(line) + { + my ($self, $line) = @_; + + $line =~ s/^ //; + $line =~ s/ $//; + + my @tokens = $line =~ /([^\(\)\{\}\[\]\<\>\'\"\=\:]+|\:\=|.)/g; + my ($type, $name, $defaultValue, $defaultValuePrefix, $afterName, $afterDefaultValue); + my @symbolStack; + + foreach my $token (@tokens) + { + if ($afterDefaultValue) + { $defaultValue .= $token; } + + elsif ($symbolStack[-1] eq '\'' || $symbolStack[-1] eq '"') + { + if ($afterName) + { $type .= $token; } + else + { $name .= $token; }; + + if ($token eq $symbolStack[-1]) + { pop @symbolStack; }; + } + + elsif ($token =~ /^[\(\[\{\<\'\"]$/) + { + push @symbolStack, $token; + + if ($afterName) + { $type .= $token; } + else + { $name .= $token; }; + } + + elsif ( ($token eq ')' && $symbolStack[-1] eq '(') || + ($token eq ']' && $symbolStack[-1] eq '[') || + ($token eq '}' && $symbolStack[-1] eq '{') || + ($token eq '>' && $symbolStack[-1] eq '<') ) + { + pop @symbolStack; + + if ($afterName) + { $type .= $token; } + else + { $name .= $token; }; + } + + elsif ($afterName) + { + if (($token eq ':=' || $token eq '=') && !scalar @symbolStack) + { + $defaultValuePrefix = $token; + $afterDefaultValue = 1; + } + else + { $type .= $token; }; + } + + elsif ($token eq ':' && !scalar @symbolStack) + { + $name .= $token; + $afterName = 1; + } + + else + { $name .= $token; }; + }; + + foreach my $part (\$type, \$name, \$defaultValue) + { + $$part =~ s/^ //; + $$part =~ s/ $//; + }; + + return NaturalDocs::Languages::Prototype::Parameter->New($type, undef, $name, undef, $defaultValue, $defaultValuePrefix); + }; + + +# +# Function: TypeBeforeParameter +# +# Returns whether the type appears before the parameter in prototypes. +# +# For example, it does in C++ +# > void Function (int a, int b) +# +# but does not in Pascal +# > function Function (a: int; b, c: int) +# +sub TypeBeforeParameter + { + return 1; + }; + + + +# +# Function: IgnoredPrefixLength +# +# Returns the length of the prefix that should be ignored in the index, or zero if none. +# +# Parameters: +# +# name - The name of the symbol. +# type - The symbol's <TopicType>. +# +# Returns: +# +# The length of the prefix to ignore, or zero if none. +# +sub IgnoredPrefixLength #(name, type) + { + my ($self, $name, $type) = @_; + + foreach my $prefixes ($self->IgnoredPrefixesFor($type), $self->IgnoredPrefixesFor(::TOPIC_GENERAL())) + { + if (defined $prefixes) + { + foreach my $prefix (@$prefixes) + { + if (substr($name, 0, length($prefix)) eq $prefix) + { return length($prefix); }; + }; + }; + }; + + return 0; + }; + + + +############################################################################### +# Group: Support Functions + + +# +# Function: StripOpeningSymbols +# +# Determines if the line starts with any of the passed symbols, and if so, replaces it with spaces. This only happens +# if the only thing before it on the line is whitespace. +# +# Parameters: +# +# lineRef - A reference to the line to check. +# symbols - An arrayref of the symbols to check for. +# +# Returns: +# +# If the line starts with any of the passed comment symbols, it will replace it in the line with spaces and return the symbol. +# If the line doesn't, it will leave the line alone and return undef. +# +sub StripOpeningSymbols #(lineRef, symbols) + { + my ($self, $lineRef, $symbols) = @_; + + if (!defined $symbols) + { return undef; }; + + my ($index, $symbol) = ::FindFirstSymbol($$lineRef, $symbols); + + if ($index != -1 && substr($$lineRef, 0, $index) =~ /^[ \t]*$/) + { + return substr($$lineRef, $index, length($symbol), ' ' x length($symbol)); + }; + + return undef; + }; + + +# +# Function: StripOpeningJavaDocSymbols +# +# Determines if the line starts with any of the passed symbols, and if so, replaces it with spaces. This only happens +# if the only thing before it on the line is whitespace and the next character after it is whitespace or the end of the line. +# +# Parameters: +# +# lineRef - A reference to the line to check. +# symbols - An arrayref of the symbols to check for. +# +# Returns: +# +# If the line starts with any of the passed comment symbols, it will replace it in the line with spaces and return the symbol. +# If the line doesn't, it will leave the line alone and return undef. +# +sub StripOpeningJavaDocSymbols #(lineRef, symbols) + { + my ($self, $lineRef, $symbols) = @_; + + if (!defined $symbols) + { return undef; }; + + my ($index, $symbol) = ::FindFirstSymbol($$lineRef, $symbols); + + if ($index != -1 && substr($$lineRef, 0, $index) =~ /^[ \t]*$/ && substr($$lineRef, $index + length($symbol), 1) =~ /^[ \t]?$/) + { + return substr($$lineRef, $index, length($symbol), ' ' x length($symbol)); + }; + + return undef; + }; + + +# +# Function: StripOpeningBlockSymbols +# +# Determines if the line starts with any of the opening symbols in the passed symbol pairs, and if so, replaces it with spaces. +# This only happens if the only thing before it on the line is whitespace. +# +# Parameters: +# +# lineRef - A reference to the line to check. +# symbolPairs - An arrayref of the symbol pairs to check for. Pairs are specified as two consecutive array entries, with the +# opening symbol first. +# +# Returns: +# +# If the line starts with any of the opening symbols, it will replace it in the line with spaces and return the closing symbol. +# If the line doesn't, it will leave the line alone and return undef. +# +sub StripOpeningBlockSymbols #(lineRef, symbolPairs) + { + my ($self, $lineRef, $symbolPairs) = @_; + + if (!defined $symbolPairs) + { return undef; }; + + for (my $i = 0; $i < scalar @$symbolPairs; $i += 2) + { + my $index = index($$lineRef, $symbolPairs->[$i]); + + if ($index != -1 && substr($$lineRef, 0, $index) =~ /^[ \t]*$/) + { + substr($$lineRef, $index, length($symbolPairs->[$i]), ' ' x length($symbolPairs->[$i])); + return $symbolPairs->[$i + 1]; + }; + }; + + return undef; + }; + + +# +# Function: StripOpeningJavaDocBlockSymbols +# +# Determines if the line starts with any of the opening symbols in the passed symbol pairs, and if so, replaces it with spaces. +# This only happens if the only thing before it on the line is whitespace and the next character is whitespace or the end of the line. +# +# Parameters: +# +# lineRef - A reference to the line to check. +# symbolPairs - An arrayref of the symbol pairs to check for. Pairs are specified as two consecutive array entries, with the +# opening symbol first. +# +# Returns: +# +# If the line starts with any of the opening symbols, it will replace it in the line with spaces and return the closing symbol. +# If the line doesn't, it will leave the line alone and return undef. +# +sub StripOpeningJavaDocBlockSymbols #(lineRef, symbolPairs) + { + my ($self, $lineRef, $symbolPairs) = @_; + + if (!defined $symbolPairs) + { return undef; }; + + for (my $i = 0; $i < scalar @$symbolPairs; $i += 2) + { + my $index = index($$lineRef, $symbolPairs->[$i]); + + if ($index != -1 && substr($$lineRef, 0, $index) =~ /^[ \t]*$/ && + substr($$lineRef, $index + length($symbolPairs->[$i]), 1) =~ /^[ \t]?$/) + { + substr($$lineRef, $index, length($symbolPairs->[$i]), ' ' x length($symbolPairs->[$i])); + return $symbolPairs->[$i + 1]; + }; + }; + + return undef; + }; + + +# +# Function: StripClosingSymbol +# +# Determines if the line contains a symbol, and if so, truncates it just before the symbol. +# +# Parameters: +# +# lineRef - A reference to the line to check. +# symbol - The symbol to check for. +# +# Returns: +# +# The remainder of the line, or undef if the symbol was not found. +# +sub StripClosingSymbol #(lineRef, symbol) + { + my ($self, $lineRef, $symbol) = @_; + + my $index = index($$lineRef, $symbol); + + if ($index != -1) + { + my $lineRemainder = substr($$lineRef, $index + length($symbol)); + $$lineRef = substr($$lineRef, 0, $index); + + return $lineRemainder; + } + else + { return undef; }; + }; + + +# +# Function: NormalizePrototype +# +# Normalizes a prototype. Specifically, condenses spaces, tabs, and line breaks into single spaces and removes leading and +# trailing ones. +# +# Parameters: +# +# prototype - The original prototype string. +# +# Returns: +# +# The normalized prototype. +# +sub NormalizePrototype #(prototype) + { + my ($self, $prototype) = @_; + + $prototype =~ tr/ \t\r\n/ /s; + $prototype =~ s/^ //; + $prototype =~ s/ $//; + + return $prototype; + }; + + +1; diff --git a/docs/tool/Modules/NaturalDocs/Languages/CSharp.pm b/docs/tool/Modules/NaturalDocs/Languages/CSharp.pm new file mode 100644 index 00000000..5bcd50be --- /dev/null +++ b/docs/tool/Modules/NaturalDocs/Languages/CSharp.pm @@ -0,0 +1,1484 @@ +############################################################################### +# +# Class: NaturalDocs::Languages::CSharp +# +############################################################################### +# +# A subclass to handle the language variations of C#. +# +# +# Topic: Language Support +# +# Supported: +# +# - Classes +# - Namespaces (no topic generated) +# - Functions +# - Constructors and Destructors +# - Properties +# - Indexers +# - Operators +# - Delegates +# - Variables +# - Constants +# - Events +# - Enums +# +# Not supported yet: +# +# - Autodocumenting enum members +# - Using alias +# +############################################################################### + +# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure +# Natural Docs is licensed under the GPL + +use strict; +use integer; + +package NaturalDocs::Languages::CSharp; + +use base 'NaturalDocs::Languages::Advanced'; + + +############################################################################### +# Group: Package Variables + +# +# hash: classKeywords +# An existence hash of all the acceptable class keywords. The keys are in all lowercase. +# +my %classKeywords = ( 'class' => 1, + 'struct' => 1, + 'interface' => 1 ); + +# +# hash: classModifiers +# An existence hash of all the acceptable class modifiers. The keys are in all lowercase. +# +my %classModifiers = ( 'new' => 1, + 'public' => 1, + 'protected' => 1, + 'internal' => 1, + 'private' => 1, + 'abstract' => 1, + 'sealed' => 1, + 'unsafe' => 1, + 'static' => 1 ); + +# +# hash: functionModifiers +# An existence hash of all the acceptable function modifiers. Also applies to properties. Also encompasses those for operators +# and indexers, but have more than are valid for them. The keys are in all lowercase. +# +my %functionModifiers = ( 'new' => 1, + 'public' => 1, + 'protected' => 1, + 'internal' => 1, + 'private' => 1, + 'static' => 1, + 'virtual' => 1, + 'sealed' => 1, + 'override' => 1, + 'abstract' => 1, + 'extern' => 1, + 'unsafe' => 1 ); + +# +# hash: variableModifiers +# An existence hash of all the acceptable variable modifiers. The keys are in all lowercase. +# +my %variableModifiers = ( 'new' => 1, + 'public' => 1, + 'protected' => 1, + 'internal' => 1, + 'private' => 1, + 'static' => 1, + 'readonly' => 1, + 'volatile' => 1, + 'unsafe' => 1 ); + +# +# hash: enumTypes +# An existence hash of all the possible enum types. The keys are in all lowercase. +# +my %enumTypes = ( 'sbyte' => 1, + 'byte' => 1, + 'short' => 1, + 'ushort' => 1, + 'int' => 1, + 'uint' => 1, + 'long' => 1, + 'ulong' => 1 ); + +# +# hash: impossibleTypeWords +# An existence hash of all the reserved words that cannot be in a type. This includes 'enum' and all modifiers. The keys are in +# all lowercase. +# +my %impossibleTypeWords = ( 'abstract' => 1, 'as' => 1, 'base' => 1, 'break' => 1, 'case' => 1, 'catch' => 1, + 'checked' => 1, 'class' => 1, 'const' => 1, 'continue' => 1, 'default' => 1, 'delegate' => 1, + 'do' => 1, 'else' => 1, 'enum' => 1, 'event' => 1, 'explicit' => 1, 'extern' => 1, + 'false' => 1, 'finally' => 1, 'fixed' => 1, 'for' => 1, 'foreach' => 1, 'goto' => 1, 'if' => 1, + 'implicit' => 1, 'in' => 1, 'interface' => 1, 'internal' => 1, 'is' => 1, 'lock' => 1, + 'namespace' => 1, 'new' => 1, 'null' => 1, 'operator' => 1, 'out' => 1, 'override' => 1, + 'params' => 1, 'private' => 1, 'protected' => 1, 'public' => 1, 'readonly' => 1, 'ref' => 1, + 'return' => 1, 'sealed' => 1, 'sizeof' => 1, 'stackalloc' => 1, 'static' => 1, + 'struct' => 1, 'switch' => 1, 'this' => 1, 'throw' => 1, 'true' => 1, 'try' => 1, 'typeof' => 1, + 'unchecked' => 1, 'unsafe' => 1, 'using' => 1, 'virtual' => 1, 'volatile' => 1, 'while' => 1 ); +# Deleted from the list: object, string, bool, decimal, sbyte, byte, short, ushort, int, uint, long, ulong, char, float, double, void + + + +############################################################################### +# Group: Interface Functions + + +# +# Function: PackageSeparator +# Returns the package separator symbol. +# +sub PackageSeparator + { return '.'; }; + + +# +# Function: EnumValues +# Returns the <EnumValuesType> that describes how the language handles enums. +# +sub EnumValues + { return ::ENUM_UNDER_TYPE(); }; + + +# +# Function: ParseFile +# +# Parses the passed source file, sending comments acceptable for documentation to <NaturalDocs::Parser->OnComment()>. +# +# Parameters: +# +# sourceFile - The <FileName> to parse. +# topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file. +# +# Returns: +# +# The array ( autoTopics, scopeRecord ). +# +# autoTopics - An arrayref of automatically generated topics from the file, or undef if none. +# scopeRecord - An arrayref of <NaturalDocs::Languages::Advanced::ScopeChanges>, or undef if none. +# +sub ParseFile #(sourceFile, topicsList) + { + my ($self, $sourceFile, $topicsList) = @_; + + $self->ParseForCommentsAndTokens($sourceFile, [ '//' ], [ '/*', '*/' ], [ '///' ], [ '/**', '*/' ] ); + + my $tokens = $self->Tokens(); + my $index = 0; + my $lineNumber = 1; + + while ($index < scalar @$tokens) + { + if ($self->TryToSkipWhitespace(\$index, \$lineNumber) || + $self->TryToGetNamespace(\$index, \$lineNumber) || + $self->TryToGetUsing(\$index, \$lineNumber) || + $self->TryToGetClass(\$index, \$lineNumber) || + $self->TryToGetFunction(\$index, \$lineNumber) || + $self->TryToGetOverloadedOperator(\$index, \$lineNumber) || + $self->TryToGetVariable(\$index, \$lineNumber) || + $self->TryToGetEnum(\$index, \$lineNumber) ) + { + # The functions above will handle everything. + } + + elsif ($tokens->[$index] eq '{') + { + $self->StartScope('}', $lineNumber, undef, undef, undef); + $index++; + } + + elsif ($tokens->[$index] eq '}') + { + if ($self->ClosingScopeSymbol() eq '}') + { $self->EndScope($lineNumber); }; + + $index++; + } + + else + { + $self->SkipRestOfStatement(\$index, \$lineNumber); + }; + }; + + + # Don't need to keep these around. + $self->ClearTokens(); + + + my $autoTopics = $self->AutoTopics(); + + my $scopeRecord = $self->ScopeRecord(); + if (defined $scopeRecord && !scalar @$scopeRecord) + { $scopeRecord = undef; }; + + return ( $autoTopics, $scopeRecord ); + }; + + + +############################################################################### +# Group: Statement Parsing Functions +# All functions here assume that the current position is at the beginning of a statement. +# +# Note for developers: I am well aware that the code in these functions do not check if we're past the end of the tokens as +# often as it should. We're making use of the fact that Perl will always return undef in these cases to keep the code simpler. + + +# +# Function: TryToGetNamespace +# +# Determines whether the position is at a namespace declaration statement, and if so, adjusts the scope, skips it, and returns +# true. +# +# Why no topic?: +# +# The main reason we don't create a Natural Docs topic for a namespace is because in order to declare class A.B.C in C#, +# you must do this: +# +# > namespace A.B +# > { +# > class C +# > { ... } +# > } +# +# That would result in a namespace topic whose only purpose is really to qualify C. It would take the default page title, and +# thus the default menu title. So if you have files for A.B.X, A.B.Y, and A.B.Z, they all will appear as A.B on the menu. +# +# If something actually appears in the namespace besides a class, it will be handled by +# <NaturalDocs::Parser->AddPackageDelineators()>. That function will add a package topic to correct the scope. +# +# If the user actually documented it, it will still appear because of the manual topic. +# +sub TryToGetNamespace #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + if (lc($tokens->[$$indexRef]) ne 'namespace') + { return undef; }; + + my $index = $$indexRef + 1; + my $lineNumber = $$lineNumberRef; + + if (!$self->TryToSkipWhitespace(\$index, \$lineNumber)) + { return undef; }; + + my $name; + + while ($tokens->[$index] =~ /^[a-z_\.\@]/i) + { + $name .= $tokens->[$index]; + $index++; + }; + + if (!defined $name) + { return undef; }; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + if ($tokens->[$index] ne '{') + { return undef; }; + + $index++; + + + # We found a valid one if we made it this far. + + my $autoTopic = NaturalDocs::Parser::ParsedTopic->New(::TOPIC_CLASS(), $name, + $self->CurrentScope(), $self->CurrentUsing(), + undef, + undef, undef, $$lineNumberRef); + + # We don't add an auto-topic for namespaces. See the function documentation above. + + NaturalDocs::Parser->OnClass($autoTopic->Package()); + + $self->StartScope('}', $lineNumber, $autoTopic->Package()); + + $$indexRef = $index; + $$lineNumberRef = $lineNumber; + + return 1; + }; + + +# +# Function: TryToGetClass +# +# Determines whether the position is at a class declaration statement, and if so, generates a topic for it, skips it, and +# returns true. +# +# Supported Syntaxes: +# +# - Classes +# - Structs +# - Interfaces +# +sub TryToGetClass #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $index = $$indexRef; + my $lineNumber = $$lineNumberRef; + + my $startIndex = $index; + my $startLine = $lineNumber; + my $needsPrototype = 0; + + if ($self->TryToSkipAttributes(\$index, \$lineNumber)) + { $self->TryToSkipWhitespace(\$index, \$lineNumber); } + + my @modifiers; + + while ($tokens->[$index] =~ /^[a-z]/i && + !exists $classKeywords{lc($tokens->[$index])} && + exists $classModifiers{lc($tokens->[$index])} ) + { + push @modifiers, lc($tokens->[$index]); + $index++; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + }; + + if (!exists $classKeywords{lc($tokens->[$index])}) + { return undef; }; + + my $lcClassKeyword = lc($tokens->[$index]); + + $index++; + + if (!$self->TryToSkipWhitespace(\$index, \$lineNumber)) + { return undef; }; + + my $name; + + while ($tokens->[$index] =~ /^[a-z_\@]/i) + { + $name .= $tokens->[$index]; + $index++; + }; + + if (!defined $name) + { return undef; }; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + if ($tokens->[$index] eq '<') + { + # XXX: This is half-assed. + $index++; + $needsPrototype = 1; + + while ($index < scalar @$tokens && $tokens->[$index] ne '>') + { + $index++; + } + + if ($index < scalar @$tokens) + { + $index++; + } + else + { return undef; } + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + } + + my @parents; + + if ($tokens->[$index] eq ':') + { + do + { + $index++; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + my $parentName; + + while ($tokens->[$index] =~ /^[a-z_\.\@]/i) + { + $parentName .= $tokens->[$index]; + $index++; + }; + + if ($tokens->[$index] eq '<') + { + # XXX: This is still half-assed. + $index++; + $needsPrototype = 1; + + while ($index < scalar @$tokens && $tokens->[$index] ne '>') + { + $index++; + } + + if ($index < scalar @$tokens) + { + $index++; + } + else + { return undef; } + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + } + + if (!defined $parentName) + { return undef; }; + + push @parents, NaturalDocs::SymbolString->FromText($parentName); + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + } + while ($tokens->[$index] eq ',') + }; + + if (lc($tokens->[$index]) eq 'where') + { + # XXX: This is also half-assed + $index++; + + while ($index < scalar @$tokens && $tokens->[$index] ne '{') + { + $index++; + } + } + + if ($tokens->[$index] ne '{') + { return undef; }; + + + # If we made it this far, we have a valid class declaration. + + my @scopeIdentifiers = NaturalDocs::SymbolString->IdentifiersOf($self->CurrentScope()); + $name = join('.', @scopeIdentifiers, $name); + + my $topicType; + + if ($lcClassKeyword eq 'interface') + { $topicType = ::TOPIC_INTERFACE(); } + else + { $topicType = ::TOPIC_CLASS(); }; + + my $prototype; + + if ($needsPrototype) + { + $prototype = $self->CreateString($startIndex, $index); + } + + my $autoTopic = NaturalDocs::Parser::ParsedTopic->New($topicType, $name, + undef, $self->CurrentUsing(), + $prototype, + undef, undef, $$lineNumberRef); + + $self->AddAutoTopic($autoTopic); + NaturalDocs::Parser->OnClass($autoTopic->Package()); + + foreach my $parent (@parents) + { + NaturalDocs::Parser->OnClassParent($autoTopic->Package(), $parent, $self->CurrentScope(), undef, + ::RESOLVE_RELATIVE()); + }; + + $self->StartScope('}', $lineNumber, $autoTopic->Package()); + + $index++; + + $$indexRef = $index; + $$lineNumberRef = $lineNumber; + + return 1; + }; + + +# +# Function: TryToGetUsing +# +# Determines whether the position is at a using statement, and if so, adds it to the current scope, skips it, and returns +# true. +# +# Supported: +# +# - Using +# +# Unsupported: +# +# - Using with alias +# +sub TryToGetUsing #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $index = $$indexRef; + my $lineNumber = $$lineNumberRef; + + if (lc($tokens->[$index]) ne 'using') + { return undef; }; + + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + my $name; + + while ($tokens->[$index] =~ /^[a-z_\@\.]/i) + { + $name .= $tokens->[$index]; + $index++; + }; + + if ($tokens->[$index] ne ';' || + !defined $name) + { return undef; }; + + $index++; + + + $self->AddUsing( NaturalDocs::SymbolString->FromText($name) ); + + $$indexRef = $index; + $$lineNumberRef = $lineNumber; + + return 1; + }; + + + +# +# Function: TryToGetFunction +# +# Determines if the position is on a function declaration, and if so, generates a topic for it, skips it, and returns true. +# +# Supported Syntaxes: +# +# - Functions +# - Constructors +# - Destructors +# - Properties +# - Indexers +# - Delegates +# - Events +# +sub TryToGetFunction #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $index = $$indexRef; + my $lineNumber = $$lineNumberRef; + + if ($self->TryToSkipAttributes(\$index, \$lineNumber)) + { $self->TryToSkipWhitespace(\$index, \$lineNumber); }; + + my $startIndex = $index; + my $startLine = $lineNumber; + + my @modifiers; + + while ($tokens->[$index] =~ /^[a-z]/i && + exists $functionModifiers{lc($tokens->[$index])} ) + { + push @modifiers, lc($tokens->[$index]); + $index++; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + }; + + my $isDelegate; + my $isEvent; + + if (lc($tokens->[$index]) eq 'delegate') + { + $isDelegate = 1; + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + } + elsif (lc($tokens->[$index]) eq 'event') + { + $isEvent = 1; + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + }; + + my $returnType = $self->TryToGetType(\$index, \$lineNumber); + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + my $name; + my $lastNameWord; + + while ($tokens->[$index] =~ /^[a-z\_\@\.\~\<]/i) + { + $name .= $tokens->[$index]; + + # Ugly hack, but what else is new? For explicit generic interface definitions, such as: + # IDObjectType System.Collections.Generic.IEnumerator<IDObjectType>.Current + + if ($tokens->[$index] eq '<') + { + do + { + $index++; + $name .= $tokens->[$index]; + } + while ($index < @$tokens && $tokens->[$index] ne '>'); + } + + $lastNameWord = $tokens->[$index]; + $index++; + }; + + if (!defined $name) + { + # Constructors and destructors don't have return types. It's possible their names were mistaken for the return type. + if (defined $returnType) + { + $name = $returnType; + $returnType = undef; + + $name =~ /([a-z0-9_]+)$/i; + $lastNameWord = $1; + } + else + { return undef; }; + }; + + # If there's no return type, make sure it's a constructor or destructor. + if (!defined $returnType) + { + my @identifiers = NaturalDocs::SymbolString->IdentifiersOf( $self->CurrentScope() ); + + if ($lastNameWord ne $identifiers[-1]) + { return undef; }; + }; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + + # Skip the brackets on indexers. + if ($tokens->[$index] eq '[' && lc($lastNameWord) eq 'this') + { + # This should jump the brackets completely. + $self->GenericSkip(\$index, \$lineNumber); + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + $name .= '[]'; + }; + + + # Properties, indexers, events with braces + + if ($tokens->[$index] eq '{') + { + my $prototype = $self->CreateString($startIndex, $index); + + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + my ($aWord, $bWord, $hasA, $hasB); + + if ($isEvent) + { + $aWord = 'add'; + $bWord = 'remove'; + } + else + { + $aWord = 'get'; + $bWord = 'set'; + }; + + while ($index < scalar @$tokens) + { + if ($self->TryToSkipAttributes(\$index, \$lineNumber)) + { $self->TryToSkipWhitespace(\$index, \$lineNumber); }; + + if (lc($tokens->[$index]) eq $aWord) + { $hasA = 1; } + elsif (lc($tokens->[$index]) eq $bWord) + { $hasB = 1; } + elsif ($tokens->[$index] eq '}') + { + $index++; + last; + }; + + $self->SkipRestOfStatement(\$index, \$lineNumber); + $self->TryToSkipWhitespace(\$index, \$lineNumber); + }; + + if ($hasA && $hasB) + { $prototype .= ' { ' . $aWord . ', ' . $bWord . ' }'; } + elsif ($hasA) + { $prototype .= ' { ' . $aWord . ' }'; } + elsif ($hasB) + { $prototype .= ' { ' . $bWord . ' }'; }; + + $prototype = $self->NormalizePrototype($prototype); + + my $topicType = ( $isEvent ? ::TOPIC_EVENT() : ::TOPIC_PROPERTY() ); + + $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New($topicType, $name, + $self->CurrentScope(), $self->CurrentUsing(), + $prototype, + undef, undef, $startLine)); + } + + + # Functions, constructors, destructors, delegates. + + elsif ($tokens->[$index] eq '(') + { + # This should jump the parenthesis completely. + $self->GenericSkip(\$index, \$lineNumber); + + my $topicType = ( $isDelegate ? ::TOPIC_DELEGATE() : ::TOPIC_FUNCTION() ); + my $prototype = $self->NormalizePrototype( $self->CreateString($startIndex, $index) ); + + $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New($topicType, $name, + $self->CurrentScope(), $self->CurrentUsing(), + $prototype, + undef, undef, $startLine)); + + $self->SkipRestOfStatement(\$index, \$lineNumber); + } + + + # Events without braces + + elsif ($isEvent && $tokens->[$index] eq ';') + { + my $prototype = $self->NormalizePrototype( $self->CreateString($startIndex, $index) ); + + $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New(::TOPIC_EVENT(), $name, + $self->CurrentScope(), $self->CurrentUsing(), + $prototype, + undef, undef, $startLine)); + $index++; + } + + else + { return undef; }; + + + # We succeeded if we got this far. + + $$indexRef = $index; + $$lineNumberRef = $lineNumber; + + return 1; + }; + + +# +# Function: TryToGetOverloadedOperator +# +# Determines if the position is on an operator overload declaration, and if so, generates a topic for it, skips it, and returns true. +# +sub TryToGetOverloadedOperator #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $index = $$indexRef; + my $lineNumber = $$lineNumberRef; + + if ($self->TryToSkipAttributes(\$index, \$lineNumber)) + { $self->TryToSkipWhitespace(\$index, \$lineNumber); }; + + my $startIndex = $index; + my $startLine = $lineNumber; + + my @modifiers; + + while ($tokens->[$index] =~ /^[a-z]/i && + exists $functionModifiers{lc($tokens->[$index])} ) + { + push @modifiers, lc($tokens->[$index]); + $index++; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + }; + + + my $name; + + + # Casting operators. + + if (lc($tokens->[$index]) eq 'implicit' || lc($tokens->[$index]) eq 'explicit') + { + $index++; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + if (lc($tokens->[$index]) ne 'operator') + { return undef; }; + + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + $name = $self->TryToGetType(\$index, \$lineNumber); + + if (!defined $name) + { return undef; }; + } + + + # Symbol operators. + + else + { + if (!$self->TryToGetType(\$index, \$lineNumber)) + { return undef; }; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + if (lc($tokens->[$index]) ne 'operator') + { return undef; }; + + $index++; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + if (lc($tokens->[$index]) eq 'true' || lc($tokens->[$index]) eq 'false') + { + $name = $tokens->[$index]; + $index++; + } + else + { + while ($tokens->[$index] =~ /^[\+\-\!\~\*\/\%\&\|\^\<\>\=]$/) + { + $name .= $tokens->[$index]; + $index++; + }; + }; + }; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + if ($tokens->[$index] ne '(') + { return undef; }; + + # This should skip the parenthesis completely. + $self->GenericSkip(\$index, \$lineNumber); + + my $prototype = $self->NormalizePrototype( $self->CreateString($startIndex, $index) ); + + $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New(::TOPIC_FUNCTION(), 'operator ' . $name, + $self->CurrentScope(), $self->CurrentUsing(), + $prototype, + undef, undef, $startLine)); + + $self->SkipRestOfStatement(\$index, \$lineNumber); + + + # We succeeded if we got this far. + + $$indexRef = $index; + $$lineNumberRef = $lineNumber; + + return 1; + }; + + +# +# Function: TryToGetVariable +# +# Determines if the position is on a variable declaration statement, and if so, generates a topic for each variable, skips the +# statement, and returns true. +# +# Supported Syntaxes: +# +# - Variables +# - Constants +# +sub TryToGetVariable #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $index = $$indexRef; + my $lineNumber = $$lineNumberRef; + + if ($self->TryToSkipAttributes(\$index, \$lineNumber)) + { $self->TryToSkipWhitespace(\$index, \$lineNumber); }; + + my $startIndex = $index; + my $startLine = $lineNumber; + + my @modifiers; + + while ($tokens->[$index] =~ /^[a-z]/i && + exists $variableModifiers{lc($tokens->[$index])} ) + { + push @modifiers, lc($tokens->[$index]); + $index++; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + }; + + my $type; + if (lc($tokens->[$index]) eq 'const') + { + $type = ::TOPIC_CONSTANT(); + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + } + else + { + $type = ::TOPIC_VARIABLE(); + }; + + if (!$self->TryToGetType(\$index, \$lineNumber)) + { return undef; }; + + my $endTypeIndex = $index; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + my @names; + + for (;;) + { + my $name; + + while ($tokens->[$index] =~ /^[a-z\@\_]/i) + { + $name .= $tokens->[$index]; + $index++; + }; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + if ($tokens->[$index] eq '=') + { + do + { + $self->GenericSkip(\$index, \$lineNumber); + } + while ($tokens->[$index] ne ',' && $tokens->[$index] ne ';'); + }; + + push @names, $name; + + if ($tokens->[$index] eq ';') + { + $index++; + last; + } + elsif ($tokens->[$index] eq ',') + { + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + } + else + { return undef; }; + }; + + + # We succeeded if we got this far. + + my $prototypePrefix = $self->CreateString($startIndex, $endTypeIndex); + + foreach my $name (@names) + { + my $prototype = $self->NormalizePrototype( $prototypePrefix . ' ' . $name ); + + $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New($type, $name, + $self->CurrentScope(), $self->CurrentUsing(), + $prototype, + undef, undef, $startLine)); + }; + + $$indexRef = $index; + $$lineNumberRef = $lineNumber; + + return 1; + }; + + +# +# Function: TryToGetEnum +# +# Determines if the position is on an enum declaration statement, and if so, generates a topic for it. +# +# Supported Syntaxes: +# +# - Enums +# - Enums with declared types +# +# Unsupported: +# +# - Documenting the members automatically +# +sub TryToGetEnum #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $index = $$indexRef; + my $lineNumber = $$lineNumberRef; + + if ($self->TryToSkipAttributes(\$index, \$lineNumber)) + { $self->TryToSkipWhitespace(\$index, \$lineNumber); }; + + my $startIndex = $index; + my $startLine = $lineNumber; + + my @modifiers; + + while ($tokens->[$index] =~ /^[a-z]/i && + exists $variableModifiers{lc($tokens->[$index])} ) + { + push @modifiers, lc($tokens->[$index]); + $index++; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + }; + + if (lc($tokens->[$index]) ne 'enum') + { return undef; } + + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + my $name; + + while ($tokens->[$index] =~ /^[a-z\@\_]/i) + { + $name .= $tokens->[$index]; + $index++; + }; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + if ($tokens->[$index] eq ':') + { + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + if (!exists $enumTypes{ lc($tokens->[$index]) }) + { return undef; } + + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + } + + if ($tokens->[$index] ne '{') + { return undef; } + + # We succeeded if we got this far. + + my $prototype = $self->CreateString($startIndex, $index); + $prototype = $self->NormalizePrototype( $prototype ); + + $self->SkipRestOfStatement(\$index, \$lineNumber); + + $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New(::TOPIC_ENUMERATION(), $name, + $self->CurrentScope(), $self->CurrentUsing(), + $prototype, + undef, undef, $startLine)); + + $$indexRef = $index; + $$lineNumberRef = $lineNumber; + + return 1; + }; + + +# +# Function: TryToGetType +# +# Determines if the position is on a type identifier, and if so, skips it and returns it as a string. This function does _not_ allow +# modifiers. You must take care of those beforehand. +# +sub TryToGetType #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $name; + my $index = $$indexRef; + my $lineNumber = $$lineNumberRef; + + while ($tokens->[$index] =~ /^[a-z\@\.\_]/i) + { + if (exists $impossibleTypeWords{ lc($tokens->[$index]) } && $name !~ /\@$/) + { return undef; }; + + $name .= $tokens->[$index]; + $index++; + }; + + if (!defined $name) + { return undef; }; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + if ($tokens->[$index] eq '?') + { + $name .= '?'; + $index++; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + } + + if ($tokens->[$index] eq '<') + { + # XXX: This is half-assed. + $name .= '<'; + $index++; + + while ($index < scalar @$tokens && $tokens->[$index] ne '>') + { + $name .= $tokens->[$index]; + $index++; + } + + if ($index < scalar @$tokens) + { + $name .= '>'; + $index++; + } + else + { return undef; } + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + } + + while ($tokens->[$index] eq '[') + { + $name .= '['; + $index++; + + while ($tokens->[$index] eq ',') + { + $name .= ','; + $index++; + }; + + if ($tokens->[$index] eq ']') + { + $name .= ']'; + $index++; + } + else + { return undef; } + }; + + $$indexRef = $index; + $$lineNumberRef = $lineNumber; + + return $name; + }; + + + +############################################################################### +# Group: Low Level Parsing Functions + + +# +# Function: GenericSkip +# +# Advances the position one place through general code. +# +# - If the position is on a string, it will skip it completely. +# - If the position is on an opening symbol, it will skip until the past the closing symbol. +# - If the position is on whitespace (including comments and preprocessing directives), it will skip it completely. +# - Otherwise it skips one token. +# +# Parameters: +# +# indexRef - A reference to the current index. +# lineNumberRef - A reference to the current line number. +# +sub GenericSkip #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + # We can ignore the scope stack because we're just skipping everything without parsing, and we need recursion anyway. + if ($tokens->[$$indexRef] eq '{') + { + $$indexRef++; + $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, '}'); + } + elsif ($tokens->[$$indexRef] eq '(') + { + $$indexRef++; + $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, ')'); + } + elsif ($tokens->[$$indexRef] eq '[') + { + $$indexRef++; + $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, ']'); + } + + elsif ($self->TryToSkipWhitespace($indexRef, $lineNumberRef) || + $self->TryToSkipString($indexRef, $lineNumberRef)) + { + } + + else + { $$indexRef++; }; + }; + + +# +# Function: GenericSkipUntilAfter +# +# Advances the position via <GenericSkip()> until a specific token is reached and passed. +# +sub GenericSkipUntilAfter #(indexRef, lineNumberRef, token) + { + my ($self, $indexRef, $lineNumberRef, $token) = @_; + my $tokens = $self->Tokens(); + + while ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] ne $token) + { $self->GenericSkip($indexRef, $lineNumberRef); }; + + if ($tokens->[$$indexRef] eq "\n") + { $$lineNumberRef++; }; + $$indexRef++; + }; + + +# +# Function: SkipRestOfStatement +# +# Advances the position via <GenericSkip()> until after the end of the current statement, which is defined as a semicolon or +# a brace group. Of course, either of those appearing inside parenthesis, a nested brace group, etc. don't count. +# +sub SkipRestOfStatement #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + while ($$indexRef < scalar @$tokens && + $tokens->[$$indexRef] ne ';' && + $tokens->[$$indexRef] ne '{') + { + $self->GenericSkip($indexRef, $lineNumberRef); + }; + + if ($tokens->[$$indexRef] eq ';') + { $$indexRef++; } + elsif ($tokens->[$$indexRef] eq '{') + { $self->GenericSkip($indexRef, $lineNumberRef); }; + }; + + +# +# Function: TryToSkipString +# If the current position is on a string delimiter, skip past the string and return true. +# +# Parameters: +# +# indexRef - A reference to the index of the position to start at. +# lineNumberRef - A reference to the line number of the position. +# +# Returns: +# +# Whether the position was at a string. +# +# Syntax Support: +# +# - Supports quotes, apostrophes, and at-quotes. +# +sub TryToSkipString #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + # The three string delimiters. All three are Perl variables when preceded by a dollar sign. + if ($self->SUPER::TryToSkipString($indexRef, $lineNumberRef, '\'') || + $self->SUPER::TryToSkipString($indexRef, $lineNumberRef, '"') ) + { + return 1; + } + elsif ($tokens->[$$indexRef] eq '@' && $tokens->[$$indexRef+1] eq '"') + { + $$indexRef += 2; + + # We need to do at-strings manually because backslash characters are accepted as regular characters, and two consecutive + # quotes are accepted as well. + + while ($$indexRef < scalar @$tokens && !($tokens->[$$indexRef] eq '"' && $tokens->[$$indexRef+1] ne '"') ) + { + if ($tokens->[$$indexRef] eq '"') + { + # This is safe because the while condition will only let through quote pairs. + $$indexRef += 2; + } + elsif ($tokens->[$$indexRef] eq "\n") + { + $$indexRef++; + $$lineNumberRef++; + } + else + { + $$indexRef++; + }; + }; + + # Skip the closing quote. + if ($$indexRef < scalar @$tokens) + { $$indexRef++; }; + + return 1; + } + else + { return undef; }; + }; + + +# +# Function: TryToSkipAttributes +# If the current position is on an attribute section, skip it and return true. Skips multiple attribute sections if they appear +# consecutively. +# +sub TryToSkipAttributes #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $success; + + while ($tokens->[$$indexRef] eq '[') + { + $success = 1; + $$indexRef++; + $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, ']'); + $self->TryToSkipWhitespace($indexRef, $lineNumberRef); + }; + + return $success; + }; + + +# +# Function: TryToSkipWhitespace +# If the current position is on a whitespace token, a line break token, a comment, or a preprocessing directive, it skips them +# and returns true. If there are a number of these in a row, it skips them all. +# +sub TryToSkipWhitespace #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $result; + + while ($$indexRef < scalar @$tokens) + { + if ($tokens->[$$indexRef] =~ /^[ \t]/) + { + $$indexRef++; + $result = 1; + } + elsif ($tokens->[$$indexRef] eq "\n") + { + $$indexRef++; + $$lineNumberRef++; + $result = 1; + } + elsif ($self->TryToSkipComment($indexRef, $lineNumberRef) || + $self->TryToSkipPreprocessingDirective($indexRef, $lineNumberRef)) + { + $result = 1; + } + else + { last; }; + }; + + return $result; + }; + + +# +# Function: TryToSkipComment +# If the current position is on a comment, skip past it and return true. +# +sub TryToSkipComment #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + + return ( $self->TryToSkipLineComment($indexRef, $lineNumberRef) || + $self->TryToSkipMultilineComment($indexRef, $lineNumberRef) ); + }; + + +# +# Function: TryToSkipLineComment +# If the current position is on a line comment symbol, skip past it and return true. +# +sub TryToSkipLineComment #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + if ($tokens->[$$indexRef] eq '/' && $tokens->[$$indexRef+1] eq '/') + { + $self->SkipRestOfLine($indexRef, $lineNumberRef); + return 1; + } + else + { return undef; }; + }; + + +# +# Function: TryToSkipMultilineComment +# If the current position is on an opening comment symbol, skip past it and return true. +# +sub TryToSkipMultilineComment #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + if ($tokens->[$$indexRef] eq '/' && $tokens->[$$indexRef+1] eq '*') + { + $self->SkipUntilAfter($indexRef, $lineNumberRef, '*', '/'); + return 1; + } + else + { return undef; }; + }; + + +# +# Function: TryToSkipPreprocessingDirective +# If the current position is on a preprocessing directive, skip past it and return true. +# +sub TryToSkipPreprocessingDirective #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + if ($tokens->[$$indexRef] eq '#' && $self->IsFirstLineToken($$indexRef)) + { + $self->SkipRestOfLine($indexRef, $lineNumberRef); + return 1; + } + else + { return undef; }; + }; + + +1; diff --git a/docs/tool/Modules/NaturalDocs/Languages/PLSQL.pm b/docs/tool/Modules/NaturalDocs/Languages/PLSQL.pm new file mode 100644 index 00000000..4c3df998 --- /dev/null +++ b/docs/tool/Modules/NaturalDocs/Languages/PLSQL.pm @@ -0,0 +1,319 @@ +############################################################################### +# +# Class: NaturalDocs::Languages::PLSQL +# +############################################################################### +# +# A subclass to handle the language variations of PL/SQL. +# +############################################################################### + +# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure +# Natural Docs is licensed under the GPL + +use strict; +use integer; + +package NaturalDocs::Languages::PLSQL; + +use base 'NaturalDocs::Languages::Simple'; + + +# +# Function: OnPrototypeEnd +# +# Microsoft's SQL specifies parameters as shown below. +# +# > CREATE PROCEDURE Test @as int, @foo int AS ... +# +# Having a parameter @is or @as is perfectly valid even though those words are also used to end the prototype. We need to +# ignore text-based enders preceded by an at sign. Also note that it does not have parenthesis for parameter lists. We need to +# skip all commas if the prototype doesn't have parenthesis but does have @ characters. +# +# Identifiers such as function names may contain the characters $, #, and _, so if "as" or "is" appears directly after one of them +# we need to ignore the ender there as well. +# +# > FUNCTION Something_is_something ... +# +# Parameters: +# +# type - The <TopicType> of the prototype. +# prototypeRef - A reference to the prototype so far, minus the ender in dispute. +# ender - The ender symbol. +# +# Returns: +# +# ENDER_ACCEPT - The ender is accepted and the prototype is finished. +# ENDER_IGNORE - The ender is rejected and parsing should continue. Note that the prototype will be rejected as a whole +# if all enders are ignored before reaching the end of the code. +# ENDER_ACCEPT_AND_CONTINUE - The ender is accepted so the prototype may stand as is. However, the prototype might +# also continue on so continue parsing. If there is no accepted ender between here and +# the end of the code this version will be accepted instead. +# ENDER_REVERT_TO_ACCEPTED - The expedition from ENDER_ACCEPT_AND_CONTINUE failed. Use the last accepted +# version and end parsing. +# +sub OnPrototypeEnd #(type, prototypeRef, ender) + { + my ($self, $type, $prototypeRef, $ender) = @_; + + # _ should be handled already. + if ($ender =~ /^[a-z]+$/i && substr($$prototypeRef, -1) =~ /^[\@\$\#]$/) + { return ::ENDER_IGNORE(); } + + elsif ($type eq ::TOPIC_FUNCTION() && $ender eq ',') + { + if ($$prototypeRef =~ /^[^\(]*\@/) + { return ::ENDER_IGNORE(); } + else + { return ::ENDER_ACCEPT(); }; + } + + else + { return ::ENDER_ACCEPT(); }; + }; + + +# +# Function: ParsePrototype +# +# Overridden to handle Microsoft's parenthesisless version. Otherwise just throws to the parent. +# +# Parameters: +# +# type - The <TopicType>. +# prototype - The text prototype. +# +# Returns: +# +# A <NaturalDocs::Languages::Prototype> object. +# +sub ParsePrototype #(type, prototype) + { + my ($self, $type, $prototype) = @_; + + my $noParenthesisParameters = ($type eq ::TOPIC_FUNCTION() && $prototype =~ /^[^\(]*\@/); + + if ($prototype !~ /\(.*[^ ].*\)/ && !$noParenthesisParameters) + { return $self->SUPER::ParsePrototype($type, $prototype); }; + + + + my ($beforeParameters, $afterParameters, $isAfterParameters); + + if ($noParenthesisParameters) + { + ($beforeParameters, $prototype) = split(/\@/, $prototype, 2); + $prototype = '@' . $prototype; + }; + + my @tokens = $prototype =~ /([^\(\)\[\]\{\}\<\>\'\"\,]+|.)/g; + + my $parameter; + my @parameterLines; + + my @symbolStack; + + foreach my $token (@tokens) + { + if ($isAfterParameters) + { $afterParameters .= $token; } + + elsif ($symbolStack[-1] eq '\'' || $symbolStack[-1] eq '"') + { + if ($noParenthesisParameters || $symbolStack[0] eq '(') + { $parameter .= $token; } + else + { $beforeParameters .= $token; }; + + if ($token eq $symbolStack[-1]) + { pop @symbolStack; }; + } + + elsif ($token =~ /^[\(\[\{\<\'\"]$/) + { + if ($noParenthesisParameters || $symbolStack[0] eq '(') + { $parameter .= $token; } + else + { $beforeParameters .= $token; }; + + push @symbolStack, $token; + } + + elsif ( ($token eq ')' && $symbolStack[-1] eq '(') || + ($token eq ']' && $symbolStack[-1] eq '[') || + ($token eq '}' && $symbolStack[-1] eq '{') || + ($token eq '>' && $symbolStack[-1] eq '<') ) + { + if (!$noParenthesisParameters && $token eq ')' && scalar @symbolStack == 1 && $symbolStack[0] eq '(') + { + $afterParameters .= $token; + $isAfterParameters = 1; + } + else + { $parameter .= $token; }; + + pop @symbolStack; + } + + elsif ($token eq ',') + { + if (!scalar @symbolStack) + { + if ($noParenthesisParameters) + { + push @parameterLines, $parameter . $token; + $parameter = undef; + } + else + { + $beforeParameters .= $token; + }; + } + else + { + if (scalar @symbolStack == 1 && $symbolStack[0] eq '(' && !$noParenthesisParameters) + { + push @parameterLines, $parameter . $token; + $parameter = undef; + } + else + { + $parameter .= $token; + }; + }; + } + + else + { + if ($noParenthesisParameters || $symbolStack[0] eq '(') + { $parameter .= $token; } + else + { $beforeParameters .= $token; }; + }; + }; + + push @parameterLines, $parameter; + + foreach my $item (\$beforeParameters, \$afterParameters) + { + $$item =~ s/^ //; + $$item =~ s/ $//; + } + + my $prototypeObject = NaturalDocs::Languages::Prototype->New($beforeParameters, $afterParameters); + + + # Parse the actual parameters. + + foreach my $parameterLine (@parameterLines) + { + $prototypeObject->AddParameter( $self->ParseParameterLine($parameterLine) ); + }; + + return $prototypeObject; + }; + + +# +# Function: ParseParameterLine +# +# Parses a prototype parameter line and returns it as a <NaturalDocs::Languages::Prototype::Parameter> object. +# +sub ParseParameterLine #(line) + { + my ($self, $line) = @_; + + $line =~ s/^ //; + $line =~ s/ $//; + + my @tokens = $line =~ /([^\(\)\[\]\{\}\<\>\'\"\:\=\ ]+|\:\=|.)/g; + + my ($name, $type, $defaultValue, $defaultValuePrefix, $inType, $inDefaultValue); + + + my @symbolStack; + + foreach my $token (@tokens) + { + if ($inDefaultValue) + { $defaultValue .= $token; } + + elsif ($symbolStack[-1] eq '\'' || $symbolStack[-1] eq '"') + { + if ($inType) + { $type .= $token; } + else + { $name .= $token; }; + + if ($token eq $symbolStack[-1]) + { pop @symbolStack; }; + } + + elsif ($token =~ /^[\(\[\{\<\'\"]$/) + { + if ($inType) + { $type .= $token; } + else + { $name .= $token; }; + + push @symbolStack, $token; + } + + elsif ( ($token eq ')' && $symbolStack[-1] eq '(') || + ($token eq ']' && $symbolStack[-1] eq '[') || + ($token eq '}' && $symbolStack[-1] eq '{') || + ($token eq '>' && $symbolStack[-1] eq '<') ) + { + if ($inType) + { $type .= $token; } + else + { $name .= $token; }; + + pop @symbolStack; + } + + elsif ($token eq ' ') + { + if ($inType) + { $type .= $token; } + elsif (!scalar @symbolStack) + { $inType = 1; } + else + { $name .= $token; }; + } + + elsif ($token eq ':=' || $token eq '=') + { + if (!scalar @symbolStack) + { + $defaultValuePrefix = $token; + $inDefaultValue = 1; + } + elsif ($inType) + { $type .= $token; } + else + { $name .= $token; }; + } + + else + { + if ($inType) + { $type .= $token; } + else + { $name .= $token; }; + }; + }; + + foreach my $part (\$type, \$defaultValue) + { + $$part =~ s/ $//; + }; + + return NaturalDocs::Languages::Prototype::Parameter->New($type, undef, $name, undef, $defaultValue, $defaultValuePrefix); + }; + + +sub TypeBeforeParameter + { return 0; }; + +1; diff --git a/docs/tool/Modules/NaturalDocs/Languages/Pascal.pm b/docs/tool/Modules/NaturalDocs/Languages/Pascal.pm new file mode 100644 index 00000000..e0242dec --- /dev/null +++ b/docs/tool/Modules/NaturalDocs/Languages/Pascal.pm @@ -0,0 +1,143 @@ +############################################################################### +# +# Class: NaturalDocs::Languages::Pascal +# +############################################################################### +# +# A subclass to handle the language variations of Pascal and Delphi. +# +############################################################################### + +# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure +# Natural Docs is licensed under the GPL + +use strict; +use integer; + +package NaturalDocs::Languages::Pascal; + +use base 'NaturalDocs::Languages::Simple'; + + +# +# hash: prototypeDirectives +# +# An existence hash of all the directives that can appear after a function prototype and will be included. The keys are the all +# lowercase keywords. +# +my %prototypeDirectives = ( 'overload' => 1, + 'override' => 1, + 'virtual' => 1, + 'abstract' => 1, + 'reintroduce' => 1, + 'export' => 1, + 'public' => 1, + 'interrupt' => 1, + 'register' => 1, + 'pascal' => 1, + 'cdecl' => 1, + 'stdcall' => 1, + 'popstack' => 1, + 'saveregisters' => 1, + 'inline' => 1, + 'safecall' => 1 ); + +# +# hash: longPrototypeDirectives +# +# An existence hash of all the directives with parameters that can appear after a function prototype and will be included. The +# keys are the all lowercase keywords. +# +my %longPrototypeDirectives = ( 'alias' => 1, + 'external' => 1 ); + +# +# bool: checkingForDirectives +# +# Set after the first function semicolon, which means we're in directives mode. +# +my $checkingForDirectives; + + +# +# Function: OnCode +# +# Just overridden to reset <checkingForDirectives>. +# +sub OnCode #(...) + { + my ($self, @parameters) = @_; + + $checkingForDirectives = 0; + + return $self->SUPER::OnCode(@parameters); + }; + + +# +# Function: OnPrototypeEnd +# +# Pascal's syntax has directives after the prototype that should be included. +# +# > function MyFunction ( param1: type ); virtual; abstract; +# +# Parameters: +# +# type - The <TopicType> of the prototype. +# prototypeRef - A reference to the prototype so far, minus the ender in dispute. +# ender - The ender symbol. +# +# Returns: +# +# ENDER_ACCEPT - The ender is accepted and the prototype is finished. +# ENDER_IGNORE - The ender is rejected and parsing should continue. Note that the prototype will be rejected as a whole +# if all enders are ignored before reaching the end of the code. +# ENDER_ACCEPT_AND_CONTINUE - The ender is accepted so the prototype may stand as is. However, the prototype might +# also continue on so continue parsing. If there is no accepted ender between here and +# the end of the code this version will be accepted instead. +# ENDER_REVERT_TO_ACCEPTED - The expedition from ENDER_ACCEPT_AND_CONTINUE failed. Use the last accepted +# version and end parsing. +# +sub OnPrototypeEnd #(type, prototypeRef, ender) + { + my ($self, $type, $prototypeRef, $ender) = @_; + + if ($type eq ::TOPIC_FUNCTION() && $ender eq ';') + { + if (!$checkingForDirectives) + { + $checkingForDirectives = 1; + return ::ENDER_ACCEPT_AND_CONTINUE(); + } + elsif ($$prototypeRef =~ /;[ \t]*([a-z]+)([^;]*)$/i) + { + my ($lastDirective, $extra) = (lc($1), $2); + + if (exists $prototypeDirectives{$lastDirective} && $extra =~ /^[ \t]*$/) + { return ::ENDER_ACCEPT_AND_CONTINUE(); } + elsif (exists $longPrototypeDirectives{$lastDirective}) + { return ::ENDER_ACCEPT_AND_CONTINUE(); } + else + { return ::ENDER_REVERT_TO_ACCEPTED(); }; + } + else + { return ::ENDER_REVERT_TO_ACCEPTED(); }; + } + else + { return ::ENDER_ACCEPT(); }; + }; + + +sub ParseParameterLine #(...) + { + my ($self, @params) = @_; + return $self->SUPER::ParsePascalParameterLine(@params); + }; + +sub TypeBeforeParameter + { + return 0; + }; + + +1; diff --git a/docs/tool/Modules/NaturalDocs/Languages/Perl.pm b/docs/tool/Modules/NaturalDocs/Languages/Perl.pm new file mode 100644 index 00000000..8817aadc --- /dev/null +++ b/docs/tool/Modules/NaturalDocs/Languages/Perl.pm @@ -0,0 +1,1370 @@ +############################################################################### +# +# Class: NaturalDocs::Languages::Perl +# +############################################################################### +# +# A subclass to handle the language variations of Perl. +# +# +# Topic: Language Support +# +# Supported: +# +# - Packages +# - Inheritance via "use base" and "@ISA =". +# - Functions +# - Variables +# +# Not supported yet: +# +# - Constants +# +############################################################################### + +# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure +# Natural Docs is licensed under the GPL + +use strict; +use integer; + +package NaturalDocs::Languages::Perl; + +use base 'NaturalDocs::Languages::Advanced'; + + +# +# array: hereDocTerminators +# An array of active Here Doc terminators, or an empty array if not active. Each entry is an arrayref of tokens. The entries +# must appear in the order they must appear in the source. +# +my @hereDocTerminators; + + + +############################################################################### +# Group: Interface Functions + + +# +# Function: PackageSeparator +# Returns the package separator symbol. +# +sub PackageSeparator + { return '::'; }; + +# +# Function: EnumValues +# Returns the <EnumValuesType> that describes how the language handles enums. +# +sub EnumValues + { return ::ENUM_GLOBAL(); }; + + +# +# Function: ParseFile +# +# Parses the passed source file, sending comments acceptable for documentation to <NaturalDocs::Parser->OnComment()>. +# +# Parameters: +# +# sourceFile - The name of the source file to parse. +# topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file. +# +# Returns: +# +# The array ( autoTopics, scopeRecord ). +# +# autoTopics - An arrayref of automatically generated topics from the file, or undef if none. +# scopeRecord - An arrayref of <NaturalDocs::Languages::Advanced::ScopeChanges>, or undef if none. +# +sub ParseFile #(sourceFile, topicsList) + { + my ($self, $sourceFile, $topicsList) = @_; + + @hereDocTerminators = ( ); + + # The regular block comment symbols are undef because they're all potentially JavaDoc comments. PreprocessFile() will + # handle translating things like =begin naturaldocs and =begin javadoc to =begin nd. + $self->ParseForCommentsAndTokens($sourceFile, [ '#' ], undef, [ '##' ], [ '=begin nd', '=end nd' ]); + + my $tokens = $self->Tokens(); + my $index = 0; + my $lineNumber = 1; + + while ($index < scalar @$tokens) + { + if ($self->TryToSkipWhitespace(\$index, \$lineNumber) || + $self->TryToGetPackage(\$index, \$lineNumber) || + $self->TryToGetBase(\$index, \$lineNumber) || + $self->TryToGetFunction(\$index, \$lineNumber) || + $self->TryToGetVariable(\$index, \$lineNumber) ) + { + # The functions above will handle everything. + } + + elsif ($tokens->[$index] eq '{') + { + $self->StartScope('}', $lineNumber, undef); + $index++; + } + + elsif ($tokens->[$index] eq '}') + { + if ($self->ClosingScopeSymbol() eq '}') + { $self->EndScope($lineNumber); }; + + $index++; + } + + elsif (lc($tokens->[$index]) eq 'eval') + { + # We want to skip the token in this case instead of letting it fall to SkipRestOfStatement. This allows evals with braces + # to be treated like normal floating braces. + $index++; + } + + else + { + $self->SkipRestOfStatement(\$index, \$lineNumber); + }; + }; + + + # Don't need to keep these around. + $self->ClearTokens(); + + return ( $self->AutoTopics(), $self->ScopeRecord() ); + }; + + +# +# Function: PreprocessFile +# +# Overridden to support "=begin nd" and similar. +# +# - "=begin [nd|naturaldocs|natural docs|jd|javadoc|java doc]" all translate to "=begin nd". +# - "=[nd|naturaldocs|natural docs]" also translate to "=begin nd". +# - "=end [nd|naturaldocs|natural docs|jd|javadoc]" all translate to "=end nd". +# - "=cut" from a ND block translates into "=end nd", but the next line will be altered to begin with "(NDPODBREAK)". This is +# so if there is POD leading into ND which ends with a cut, the parser can still end the original POD because the end ND line +# would have been removed. Remember, <NaturalDocs::Languages::Advanced->ParseForCommentsAndTokens()> removes +# Natural Docs-worthy comments to save parsing time. +# - "=pod begin nd" and "=pod end nd" are supported for compatibility with ND 1.32 and earlier, even though the syntax is a +# mistake. +# - It also supports the wrong plural forms, so naturaldoc/natural doc/javadocs/java docs will work. +# +sub PreprocessFile #(lines) + { + my ($self, $lines) = @_; + + my $inNDPOD = 0; + my $mustBreakPOD = 0; + + for (my $i = 0; $i < scalar @$lines; $i++) + { + if ($lines->[$i] =~ /^\=(?:(?:pod[ \t]+)?begin[ \t]+)?(?:nd|natural[ \t]*docs?|jd|java[ \t]*docs?)[ \t]*$/i) + { + $lines->[$i] = '=begin nd'; + $inNDPOD = 1; + $mustBreakPOD = 0; + } + elsif ($lines->[$i] =~ /^\=(?:pod[ \t]+)end[ \t]+(?:nd|natural[ \t]*docs?|jd|javadocs?)[ \t]*$/i) + { + $lines->[$i] = '=end nd'; + $inNDPOD = 0; + $mustBreakPOD = 0; + } + elsif ($lines->[$i] =~ /^\=cut[ \t]*$/i) + { + if ($inNDPOD) + { + $lines->[$i] = '=end nd'; + $inNDPOD = 0; + $mustBreakPOD = 1; + }; + } + elsif ($mustBreakPOD) + { + $lines->[$i] = '(NDPODBREAK)' . $lines->[$i]; + $mustBreakPOD = 0; + }; + }; + }; + + + +############################################################################### +# Group: Statement Parsing Functions +# All functions here assume that the current position is at the beginning of a statement. +# +# Note for developers: I am well aware that the code in these functions do not check if we're past the end of the tokens as +# often as it should. We're making use of the fact that Perl will always return undef in these cases to keep the code simpler. + + +# +# Function: TryToGetPackage +# +# Determines whether the position is at a package declaration statement, and if so, generates a topic for it, skips it, and +# returns true. +# +sub TryToGetPackage #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + if (lc($tokens->[$$indexRef]) eq 'package') + { + my $index = $$indexRef + 1; + my $lineNumber = $$lineNumberRef; + + if (!$self->TryToSkipWhitespace(\$index, \$lineNumber)) + { return undef; }; + + my $name; + + while ($tokens->[$index] =~ /^[a-z_\:]/i) + { + $name .= $tokens->[$index]; + $index++; + }; + + if (!defined $name) + { return undef; }; + + my $autoTopic = NaturalDocs::Parser::ParsedTopic->New(::TOPIC_CLASS(), $name, + undef, undef, + undef, + undef, undef, $$lineNumberRef); + $self->AddAutoTopic($autoTopic); + + NaturalDocs::Parser->OnClass($autoTopic->Symbol()); + + $self->SetPackage($autoTopic->Symbol(), $$lineNumberRef); + + $$indexRef = $index; + $$lineNumberRef = $lineNumber; + $self->SkipRestOfStatement($indexRef, $lineNumberRef); + + return 1; + }; + + return undef; + }; + + +# +# Function: TryToGetBase +# +# Determines whether the position is at a package base declaration statement, and if so, calls +# <NaturalDocs::Parser->OnClassParent()>. +# +# Supported Syntaxes: +# +# > use base [list of strings] +# > @ISA = [list of strings] +# > @[package]::ISA = [list of strings] +# > our @ISA = [list of strings] +# +sub TryToGetBase #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my ($index, $lineNumber, $class, $parents); + + if (lc($tokens->[$$indexRef]) eq 'use') + { + $index = $$indexRef + 1; + $lineNumber = $$lineNumberRef; + + if (!$self->TryToSkipWhitespace(\$index, \$lineNumber) || + lc($tokens->[$index]) ne 'base') + { return undef; } + + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + $parents = $self->TryToGetListOfStrings(\$index, \$lineNumber); + } + + else + { + $index = $$indexRef; + $lineNumber = $$lineNumberRef; + + if (lc($tokens->[$index]) eq 'our') + { + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + }; + + if ($tokens->[$index] eq '@') + { + $index++; + + while ($index < scalar @$tokens) + { + if ($tokens->[$index] eq 'ISA') + { + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + if ($tokens->[$index] eq '=') + { + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + $parents = $self->TryToGetListOfStrings(\$index, \$lineNumber); + } + else + { last; }; + } + + # If token isn't ISA... + elsif ($tokens->[$index] =~ /^[a-z0-9_:]/i) + { + $class .= $tokens->[$index]; + $index++; + } + else + { last; }; + }; + }; + }; + + if (defined $parents) + { + if (defined $class) + { + $class =~ s/::$//; + my @classIdentifiers = split(/::/, $class); + $class = NaturalDocs::SymbolString->Join(@classIdentifiers); + } + else + { $class = $self->CurrentScope(); }; + + foreach my $parent (@$parents) + { + my @parentIdentifiers = split(/::/, $parent); + my $parentSymbol = NaturalDocs::SymbolString->Join(@parentIdentifiers); + + NaturalDocs::Parser->OnClassParent($class, $parentSymbol, undef, undef, ::RESOLVE_ABSOLUTE()); + }; + + $$indexRef = $index; + $$lineNumberRef = $lineNumber; + $self->SkipRestOfStatement($indexRef, $lineNumberRef); + + return 1; + } + else + { return undef; }; + }; + + +# +# Function: TryToGetFunction +# +# Determines whether the position is at a function declaration statement, and if so, generates a topic for it, skips it, and +# returns true. +# +sub TryToGetFunction #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + if ( lc($tokens->[$$indexRef]) eq 'sub') + { + my $prototypeStart = $$indexRef; + my $prototypeStartLine = $$lineNumberRef; + my $prototypeEnd = $$indexRef + 1; + my $prototypeEndLine = $$lineNumberRef; + + if ( !$self->TryToSkipWhitespace(\$prototypeEnd, \$prototypeEndLine) || + $tokens->[$prototypeEnd] !~ /^[a-z_]/i ) + { return undef; }; + + my $name = $tokens->[$prototypeEnd]; + $prototypeEnd++; + + # We parsed 'sub [name]'. Now keep going until we find a semicolon or a brace. + + for (;;) + { + if ($prototypeEnd >= scalar @$tokens) + { return undef; } + + # End if we find a semicolon, since it means we found a predeclaration rather than an actual function. + elsif ($tokens->[$prototypeEnd] eq ';') + { return undef; } + + elsif ($tokens->[$prototypeEnd] eq '{') + { + # Found it! + + my $prototype = $self->NormalizePrototype( $self->CreateString($prototypeStart, $prototypeEnd) ); + + $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New(::TOPIC_FUNCTION(), $name, + $self->CurrentScope(), undef, + $prototype, + undef, undef, $prototypeStartLine)); + + $$indexRef = $prototypeEnd; + $$lineNumberRef = $prototypeEndLine; + + $self->SkipRestOfStatement($indexRef, $lineNumberRef); + + return 1; + } + + else + { $self->GenericSkip(\$prototypeEnd, \$prototypeEndLine, 0, 1); }; + }; + } + else + { return undef; }; + }; + + +# +# Function: TryToGetVariable +# +# Determines if the position is at a variable declaration statement, and if so, generates a topic for it, skips it, and returns +# true. +# +# Supported Syntaxes: +# +# - Supports variables declared with "my", "our", and "local". +# - Supports multiple declarations in one statement, such as "my ($x, $y);". +# - Supports types and attributes. +# +sub TryToGetVariable #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $firstToken = lc( $tokens->[$$indexRef] ); + + if ($firstToken eq 'my' || $firstToken eq 'our' || $firstToken eq 'local') + { + my $prototypeStart = $$indexRef; + my $prototypeStartLine = $$lineNumberRef; + my $prototypeEnd = $$indexRef + 1; + my $prototypeEndLine = $$lineNumberRef; + + $self->TryToSkipWhitespace(\$prototypeEnd, \$prototypeEndLine); + + + # Get the type if present. + + my $type; + + if ($tokens->[$prototypeEnd] =~ /^[a-z\:]/i) + { + do + { + $type .= $tokens->[$prototypeEnd]; + $prototypeEnd++; + } + while ($tokens->[$prototypeEnd] =~ /^[a-z\:]/i); + + if (!$self->TryToSkipWhitespace(\$prototypeEnd, \$prototypeEndLine)) + { return undef; }; + }; + + + # Get the name, or possibly names. + + if ($tokens->[$prototypeEnd] eq '(') + { + # If there's multiple variables, we'll need to build a custom prototype for each one. $firstToken already has the + # declaring word. We're going to store each name in @names, and we're going to use $prototypeStart and + # $prototypeEnd to capture any properties appearing after the list. + + my $name; + my @names; + my $hasComma = 0; + + $prototypeStart = $prototypeEnd + 1; + $prototypeStartLine = $prototypeEndLine; + + for (;;) + { + $self->TryToSkipWhitespace(\$prototypeStart, \$prototypeStartLine); + + $name = $self->TryToGetVariableName(\$prototypeStart, \$prototypeStartLine); + + if (!defined $name) + { return undef; }; + + push @names, $name; + + $self->TryToSkipWhitespace(\$prototypeStart, \$prototypeStartLine); + + # We can have multiple commas in a row. We can also have trailing commas. However, the parenthesis must + # not start with a comma or be empty, hence this logic does not appear earlier. + while ($tokens->[$prototypeStart] eq ',') + { + $prototypeStart++; + $self->TryToSkipWhitespace(\$prototypeStart, \$prototypeStartLine); + + $hasComma = 1; + } + + if ($tokens->[$prototypeStart] eq ')') + { + $prototypeStart++; + last; + } + elsif (!$hasComma) + { return undef; }; + }; + + + # Now find the end of the prototype. + + $prototypeEnd = $prototypeStart; + $prototypeEndLine = $prototypeStartLine; + + while ($prototypeEnd < scalar @$tokens && + $tokens->[$prototypeEnd] !~ /^[\;\=]/) + { + $prototypeEnd++; + }; + + + my $prototypePrefix = $firstToken . ' '; + if (defined $type) + { $prototypePrefix .= $type . ' '; }; + + my $prototypeSuffix = ' ' . $self->CreateString($prototypeStart, $prototypeEnd); + + foreach $name (@names) + { + my $prototype = $self->NormalizePrototype( $prototypePrefix . $name . $prototypeSuffix ); + + $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New(::TOPIC_VARIABLE(), $name, + $self->CurrentScope(), undef, + $prototype, + undef, undef, $prototypeStartLine)); + }; + + $self->SkipRestOfStatement(\$prototypeEnd, \$prototypeEndLine); + + $$indexRef = $prototypeEnd; + $$lineNumberRef = $prototypeEndLine; + } + + else # no parenthesis + { + my $name = $self->TryToGetVariableName(\$prototypeEnd, \$prototypeEndLine); + + if (!defined $name) + { return undef; }; + + while ($prototypeEnd < scalar @$tokens && + $tokens->[$prototypeEnd] !~ /^[\;\=]/) + { + $prototypeEnd++; + }; + + my $prototype = $self->NormalizePrototype( $self->CreateString($prototypeStart, $prototypeEnd) ); + + $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New(::TOPIC_VARIABLE(), $name, + $self->CurrentScope(), undef, + $prototype, + undef, undef, $prototypeStartLine)); + + $self->SkipRestOfStatement(\$prototypeEnd, \$prototypeEndLine); + + $$indexRef = $prototypeEnd; + $$lineNumberRef = $prototypeEndLine; + }; + + return 1; + } + else + { return undef; }; + }; + + +# +# Function: TryToGetVariableName +# +# Determines if the position is at a variable name, and if so, skips it and returns the name. +# +sub TryToGetVariableName #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $name; + + if ($tokens->[$$indexRef] =~ /^[\$\@\%\*]/) + { + $name .= $tokens->[$$indexRef]; + $$indexRef++; + + $self->TryToSkipWhitespace($indexRef, $lineNumberRef); + + if ($tokens->[$$indexRef] =~ /^[a-z_]/i) + { + $name .= $tokens->[$$indexRef]; + $$indexRef++; + } + else + { return undef; }; + }; + + return $name; + }; + + +# +# Function: TryToGetListOfStrings +# +# Attempts to retrieve a list of strings from the current position. Returns an arrayref of them if any are found, or undef if none. +# It stops the moment it reaches a non-string, so "string1, variable, string2" will only return string1. +# +# Supported Syntaxes: +# +# - Supports parenthesis. +# - Supports all string forms supported by <TryToSkipString()>. +# - Supports qw() string arrays. +# +sub TryToGetListOfStrings #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $parenthesis = 0; + my $strings; + + while ($$indexRef < scalar @$tokens) + { + # We'll tolerate parenthesis. + if ($tokens->[$$indexRef] eq '(') + { + $$indexRef++; + $parenthesis++; + } + elsif ($tokens->[$$indexRef] eq ')') + { + if ($parenthesis == 0) + { last; }; + + $$indexRef++; + $parenthesis--; + } + elsif ($tokens->[$$indexRef] eq ',') + { + $$indexRef++; + } + else + { + my ($startContent, $endContent); + my $symbolIndex = $$indexRef; + + if ($self->TryToSkipString($indexRef, $lineNumberRef, \$startContent, \$endContent)) + { + my $content = $self->CreateString($startContent, $endContent); + + if (!defined $strings) + { $strings = [ ]; }; + + if (lc($tokens->[$symbolIndex]) eq 'qw') + { + $content =~ tr/ \t\n/ /s; + $content =~ s/^ //; + + my @qwStrings = split(/ /, $content); + + push @$strings, @qwStrings; + } + else + { + push @$strings, $content; + }; + } + else + { last; }; + }; + + $self->TryToSkipWhitespace($indexRef, $lineNumberRef); + }; + + return $strings; + }; + + +############################################################################### +# Group: Low Level Parsing Functions + + +# +# Function: GenericSkip +# +# Advances the position one place through general code. +# +# - If the position is on a comment or string, it will skip it completely. +# - If the position is on an opening symbol, it will skip until the past the closing symbol. +# - If the position is on a regexp or quote-like operator, it will skip it completely. +# - If the position is on a backslash, it will skip it and the following token. +# - If the position is on whitespace (including comments), it will skip it completely. +# - Otherwise it skips one token. +# +# Parameters: +# +# indexRef - A reference to the current index. +# lineNumberRef - A reference to the current line number. +# noRegExps - If set, does not test for regular expressions. +# +sub GenericSkip #(indexRef, lineNumberRef, noRegExps) + { + my ($self, $indexRef, $lineNumberRef, $noRegExps, $allowStringedClosingParens) = @_; + my $tokens = $self->Tokens(); + + if ($tokens->[$$indexRef] eq "\\" && $$indexRef + 1 < scalar @$tokens && $tokens->[$$indexRef+1] ne "\n") + { $$indexRef += 2; } + + # Note that we don't want to count backslashed ()[]{} since they could be in regexps. Also, ()[] are valid variable names + # when preceded by a string. + + # We can ignore the scope stack because we're just skipping everything without parsing, and we need recursion anyway. + elsif ($tokens->[$$indexRef] eq '{' && !$self->IsBackslashed($$indexRef)) + { + $$indexRef++; + $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, '}', $noRegExps, $allowStringedClosingParens); + } + elsif ($tokens->[$$indexRef] eq '(' && !$self->IsBackslashed($$indexRef) && !$self->IsStringed($$indexRef)) + { + # Temporarily allow stringed closing parenthesis if it looks like we're in an anonymous function declaration with Perl's + # cheap version of prototypes, such as "my $_declare = sub($) {}". + my $tempAllowStringedClosingParens = $allowStringedClosingParens; + if (!$allowStringedClosingParens) + { + my $tempIndex = $$indexRef - 1; + if ($tempIndex >= 0 && $tokens->[$tempIndex] =~ /^[ \t]/) + { $tempIndex--; } + if ($tempIndex >= 0 && $tokens->[$tempIndex] eq 'sub') + { $tempAllowStringedClosingParens = 1; } + } + + $$indexRef++; + + do + { $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, ')', $noRegExps, $tempAllowStringedClosingParens); } + while ($$indexRef < scalar @$tokens && $self->IsStringed($$indexRef - 1) && !$tempAllowStringedClosingParens); + } + elsif ($tokens->[$$indexRef] eq '[' && !$self->IsBackslashed($$indexRef) && !$self->IsStringed($$indexRef)) + { + $$indexRef++; + + do + { $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, ']', $noRegExps, $allowStringedClosingParens); } + while ($$indexRef < scalar @$tokens && $self->IsStringed($$indexRef - 1)); + } + + elsif ($self->TryToSkipWhitespace($indexRef, $lineNumberRef) || + $self->TryToSkipString($indexRef, $lineNumberRef) || + $self->TryToSkipHereDocDeclaration($indexRef, $lineNumberRef) || + (!$noRegExps && $self->TryToSkipRegexp($indexRef, $lineNumberRef) ) ) + { + } + + else + { $$indexRef++; }; + }; + + +# +# Function: GenericSkipUntilAfter +# +# Advances the position via <GenericSkip()> until a specific token is reached and passed. +# +sub GenericSkipUntilAfter #(indexRef, lineNumberRef, token, noRegExps, allowStringedClosingParens) + { + my ($self, $indexRef, $lineNumberRef, $token, $noRegExps, $allowStringedClosingParens) = @_; + my $tokens = $self->Tokens(); + + while ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] ne $token) + { $self->GenericSkip($indexRef, $lineNumberRef, $noRegExps, $allowStringedClosingParens); }; + + if ($tokens->[$$indexRef] eq "\n") + { $$lineNumberRef++; }; + $$indexRef++; + }; + + +# +# Function: GenericRegexpSkip +# +# Advances the position one place through regexp code. +# +# - If the position is on an opening symbol, it will skip until the past the closing symbol. +# - If the position is on a backslash, it will skip it and the following token. +# - If the position is on whitespace (not including comments), it will skip it completely. +# - Otherwise it skips one token. +# +# Also differs from <GenericSkip()> in that the parenthesis in $( and $) do count against the scope, where they wouldn't +# normally. +# +# Parameters: +# +# indexRef - A reference to the current index. +# lineNumberRef - A reference to the current line number. +# inBrackets - Whether we're in brackets or not. If true, we don't care about matching braces and parenthesis. +# +sub GenericRegexpSkip #(indexRef, lineNumberRef, inBrackets) + { + my ($self, $indexRef, $lineNumberRef, $inBrackets) = @_; + my $tokens = $self->Tokens(); + + if ($tokens->[$$indexRef] eq "\\" && $$indexRef + 1 < scalar @$tokens && $tokens->[$$indexRef+1] ne "\n") + { $$indexRef += 2; } + + # We can ignore the scope stack because we're just skipping everything without parsing, and we need recursion anyway. + elsif ($tokens->[$$indexRef] eq '{' && !$self->IsBackslashed($$indexRef) && !$inBrackets) + { + $$indexRef++; + $self->GenericRegexpSkipUntilAfter($indexRef, $lineNumberRef, '}'); + } + elsif ($tokens->[$$indexRef] eq '(' && !$self->IsBackslashed($$indexRef) && !$inBrackets) + { + $$indexRef++; + $self->GenericRegexpSkipUntilAfter($indexRef, $lineNumberRef, ')'); + } + elsif ($tokens->[$$indexRef] eq '[' && !$self->IsBackslashed($$indexRef) && !$self->IsStringed($$indexRef)) + { + $$indexRef++; + + do + { $self->GenericRegexpSkipUntilAfter($indexRef, $lineNumberRef, ']'); } + while ($$indexRef < scalar @$tokens && $self->IsStringed($$indexRef - 1)); + } + + elsif ($tokens->[$$indexRef] eq "\n") + { + $$lineNumberRef++; + $$indexRef++; + } + + else + { $$indexRef++; }; + }; + + +# +# Function: GenericRegexpSkipUntilAfter +# +# Advances the position via <GenericRegexpSkip()> until a specific token is reached and passed. +# +sub GenericRegexpSkipUntilAfter #(indexRef, lineNumberRef, token) + { + my ($self, $indexRef, $lineNumberRef, $token) = @_; + my $tokens = $self->Tokens(); + + my $inBrackets = ( $token eq ']' ); + + while ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] ne $token) + { $self->GenericRegexpSkip($indexRef, $lineNumberRef, $inBrackets); }; + + if ($tokens->[$$indexRef] eq "\n") + { $$lineNumberRef++; }; + $$indexRef++; + }; + + +# +# Function: SkipRestOfStatement +# +# Advances the position via <GenericSkip()> until after the end of the current statement, which is defined as a semicolon or +# a brace group. Of course, either of those appearing inside parenthesis, a nested brace group, etc. don't count. +# +sub SkipRestOfStatement #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + while ($$indexRef < scalar @$tokens && + $tokens->[$$indexRef] ne ';' && + !($tokens->[$$indexRef] eq '{' && !$self->IsStringed($$indexRef)) ) + { + $self->GenericSkip($indexRef, $lineNumberRef); + }; + + if ($tokens->[$$indexRef] eq ';') + { $$indexRef++; } + elsif ($tokens->[$$indexRef] eq '{') + { $self->GenericSkip($indexRef, $lineNumberRef); }; + }; + + +# +# Function: TryToSkipWhitespace +# +# If the current position is on whitespace it skips them and returns true. If there are a number of these in a row, it skips them +# all. +# +# Supported Syntax: +# +# - Whitespace +# - Line break +# - All comment forms supported by <TryToSkipComment()> +# - Here Doc content +# +sub TryToSkipWhitespace #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $result; + + while ($$indexRef < scalar @$tokens) + { + if ($self->TryToSkipHereDocContent($indexRef, $lineNumberRef) || + $self->TryToSkipComment($indexRef, $lineNumberRef)) + { + $result = 1; + } + elsif ($tokens->[$$indexRef] =~ /^[ \t]/) + { + $$indexRef++; + $result = 1; + } + elsif ($tokens->[$$indexRef] eq "\n") + { + $$indexRef++; + $$lineNumberRef++; + $result = 1; + } + else + { last; }; + }; + + return $result; + }; + + +# +# Function: TryToSkipComment +# If the current position is on a comment, skip past it and return true. +# +sub TryToSkipComment #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + + return ( $self->TryToSkipLineComment($indexRef, $lineNumberRef) || + $self->TryToSkipPODComment($indexRef, $lineNumberRef) ); + }; + + +# +# Function: TryToSkipLineComment +# If the current position is on a line comment symbol, skip past it and return true. +# +sub TryToSkipLineComment #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + # Note that $#var is not a comment. + if ($tokens->[$$indexRef] eq '#' && !$self->IsStringed($$indexRef)) + { + $self->SkipRestOfLine($indexRef, $lineNumberRef); + return 1; + } + else + { return undef; }; + }; + + +# +# Function: TryToSkipPODComment +# If the current position is on a POD comment symbol, skip past it and return true. +# +sub TryToSkipPODComment #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + # Note that whitespace is not allowed before the equals sign. It must directly start a line. + if ($tokens->[$$indexRef] eq '=' && + ( $$indexRef == 0 || $tokens->[$$indexRef - 1] eq "\n" ) && + $tokens->[$$indexRef + 1] =~ /^[a-z]/i ) + { + # Skip until =cut or (NDPODBREAK). Note that it's theoretically possible for =cut to appear without a prior POD directive. + + do + { + if ($tokens->[$$indexRef] eq '=' && lc( $tokens->[$$indexRef + 1] ) eq 'cut') + { + $self->SkipRestOfLine($indexRef, $lineNumberRef); + last; + } + elsif ($tokens->[$$indexRef] eq '(' && $$indexRef + 2 < scalar @$tokens && + $tokens->[$$indexRef+1] eq 'NDPODBREAK' && $tokens->[$$indexRef+2] eq ')') + { + $$indexRef += 3; + last; + } + else + { + $self->SkipRestOfLine($indexRef, $lineNumberRef); + }; + } + while ($$indexRef < scalar @$tokens); + + return 1; + } + + # It's also possible that (NDPODBREAK) will appear without any opening pod statement because "=begin nd" and "=cut" will + # still result in one. We need to pick off the stray (NDPODBREAK). + elsif ($tokens->[$$indexRef] eq '(' && $$indexRef + 2 < scalar @$tokens && + $tokens->[$$indexRef+1] eq 'NDPODBREAK' && $tokens->[$$indexRef+2] eq ')') + { + $$indexRef += 3; + return 1; + } + + else + { return undef; }; + }; + + +# +# Function: TryToSkipString +# If the current position is on a string delimiter, skip past the string and return true. +# +# Parameters: +# +# indexRef - A reference to the index of the position to start at. +# lineNumberRef - A reference to the line number of the position. +# startContentIndexRef - A reference to the variable in which to store the index of the first content token. May be undef. +# endContentIndexRef - A reference to the variable in which to store the index of the end of the content, which is one past +# the last content token. may be undef. +# +# Returns: +# +# Whether the position was at a string. The index, line number, and content index variabls will only be changed if true. +# +# Syntax Support: +# +# - Supports quotes, apostrophes, backticks, q(), qq(), qx(), and qw(). +# - All symbols are supported for the letter forms. +# +sub TryToSkipString #(indexRef, lineNumberRef, startContentIndexRef, endContentIndexRef) + { + my ($self, $indexRef, $lineNumberRef, $startContentIndexRef, $endContentIndexRef) = @_; + my $tokens = $self->Tokens(); + + # The three string delimiters. All three are Perl variables when preceded by a dollar sign. + if (!$self->IsStringed($$indexRef) && + ( $self->SUPER::TryToSkipString($indexRef, $lineNumberRef, '\'', '\'', $startContentIndexRef, $endContentIndexRef) || + $self->SUPER::TryToSkipString($indexRef, $lineNumberRef, '"', '"', $startContentIndexRef, $endContentIndexRef) || + $self->SUPER::TryToSkipString($indexRef, $lineNumberRef, '`', '`', $startContentIndexRef, $endContentIndexRef) ) ) + { + return 1; + } + elsif ($tokens->[$$indexRef] =~ /^(?:q|qq|qx|qw)$/i && + ($$indexRef == 0 || $tokens->[$$indexRef - 1] !~ /^[\$\%\@\*]$/)) + { + $$indexRef++; + + $self->TryToSkipWhitespace($indexRef, $lineNumberRef); + + my $openingSymbol = $tokens->[$$indexRef]; + my $closingSymbol; + + if ($openingSymbol eq '{') + { $closingSymbol = '}'; } + elsif ($openingSymbol eq '(') + { $closingSymbol = ')'; } + elsif ($openingSymbol eq '[') + { $closingSymbol = ']'; } + elsif ($openingSymbol eq '<') + { $closingSymbol = '>'; } + else + { $closingSymbol = $openingSymbol; }; + + $self->SUPER::TryToSkipString($indexRef, $lineNumberRef, $openingSymbol, $closingSymbol, + $startContentIndexRef, $endContentIndexRef); + + return 1; + } + else + { return undef; }; + }; + + +# +# Function: TryToSkipHereDocDeclaration +# +# If the current position is on a Here Doc declaration, add its terminators to <hereDocTerminators> and skip it. +# +# Syntax Support: +# +# - Supports <<EOF +# - Supports << "String" with all string forms supported by <TryToSkipString()>. +# +sub TryToSkipHereDocDeclaration #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $index = $$indexRef; + my $lineNumber = $$lineNumberRef; + + if ($tokens->[$index] eq '<' && $tokens->[$index + 1] eq '<') + { + $index += 2; + my $success; + + # No whitespace allowed with the bare word. + if ($tokens->[$index] =~ /^[a-z0-9_]/i) + { + push @hereDocTerminators, [ $tokens->[$index] ]; + $index++; + $success = 1; + } + else + { + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + my ($contentStart, $contentEnd); + if ($self->TryToSkipString(\$index, \$lineNumber, \$contentStart, \$contentEnd)) + { + push @hereDocTerminators, [ @{$tokens}[$contentStart..$contentEnd - 1] ]; + $success = 1; + }; + }; + + if ($success) + { + $$indexRef = $index; + $$lineNumberRef = $lineNumber; + + return 1; + }; + }; + + return 0; + }; + + +# +# Function: TryToSkipHereDocContent +# +# If the current position is at the beginning of a line and there are entries in <hereDocTerminators>, skips lines until all the +# terminators are exhausted or we reach the end of the file. +# +# Returns: +# +# Whether the position was on Here Doc content. +# +sub TryToSkipHereDocContent #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + # We don't use IsFirstLineToken() because it really needs to be the first line token. Whitespace is not allowed. + if ($$indexRef > 0 && $tokens->[$$indexRef - 1] eq "\n") + { + my $success = (scalar @hereDocTerminators > 0); + + while (scalar @hereDocTerminators && $$indexRef < scalar @$tokens) + { + my $terminatorIndex = 0; + + while ($hereDocTerminators[0]->[$terminatorIndex] eq $tokens->[$$indexRef]) + { + $terminatorIndex++; + $$indexRef++; + }; + + if ($terminatorIndex == scalar @{$hereDocTerminators[0]} && + ($tokens->[$$indexRef] eq "\n" || ($tokens->[$$indexRef] =~ /^[ \t]/ && $tokens->[$$indexRef + 1] eq "\n")) ) + { + shift @hereDocTerminators; + $$indexRef++; + $$lineNumberRef++; + } + else + { $self->SkipRestOfLine($indexRef, $lineNumberRef); }; + }; + + return $success; + } + + else + { return 0; }; + }; + + +# +# Function: TryToSkipRegexp +# If the current position is on a regular expression or a quote-like operator, skip past it and return true. +# +# Syntax Support: +# +# - Supports //, ??, m//, qr//, s///, tr///, and y///. +# - All symbols are supported for the letter forms. +# - ?? is *not* supported because it could cause problems with ?: statements. The generic parser has a good chance of +# successfully stumbling through a regex, whereas the regex code will almost certainly see the rest of the file as part of it. +# +sub TryToSkipRegexp #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $isRegexp; + + # If it's a supported character sequence that's not a variable (ex $qr)... + if ($tokens->[$$indexRef] =~ /^(?:m|qr|s|tr|y)$/i && + ($$indexRef == 0 || $tokens->[$$indexRef - 1] !~ /^[\$\%\@\*\-]$/) ) + { $isRegexp = 1; } + + elsif ($tokens->[$$indexRef] eq '/' && !$self->IsStringed($$indexRef)) + { + # This is a bit of a hack. If we find a random slash, it could be a divide operator or a bare regexp. Find the first previous + # non-whitespace token and if it's text, a closing brace, or a string, assume it's a divide operator. (Strings don't make + # much pratical sense there but a regexp would be impossible.) Otherwise assume it's a regexp. + + # We make a special consideration for split() appearing without parenthesis. If the previous token is split and it's not a + # variable, assume it is a regexp even though it fails the above test. + + my $index = $$indexRef - 1; + + while ($index >= 0 && $tokens->[$index] =~ /^(?: |\t|\n)/) + { $index--; }; + + if ($index < 0 || $tokens->[$index] !~ /^[a-zA-Z0-9_\)\]\}\'\"\`]/ || + ($tokens->[$index] =~ /^split|grep$/ && $index > 0 && $tokens->[$index-1] !~ /^[\$\%\@\*]$/) ) + { $isRegexp = 1; }; + }; + + if ($isRegexp) + { + my $operator = lc($tokens->[$$indexRef]); + my $index = $$indexRef; + my $lineNumber = $$lineNumberRef; + + if ($operator =~ /^[\?\/]/) + { $operator = 'm'; } + else + { + $index++; + + # Believe it or not, s#...# is allowed. We can't pass over number signs here. + if ($tokens->[$index] ne '#') + { $self->TryToSkipWhitespace(\$index, \$lineNumber); }; + }; + + if ($tokens->[$index] =~ /^\w/) + { return undef; }; + if ($tokens->[$index] eq '=' && $tokens->[$index+1] eq '>') + { return undef; }; + + my $openingSymbol = $tokens->[$index]; + my $closingSymbol; + + if ($openingSymbol eq '{') + { $closingSymbol = '}'; } + elsif ($openingSymbol eq '(') + { $closingSymbol = ')'; } + elsif ($openingSymbol eq '[') + { $closingSymbol = ']'; } + elsif ($openingSymbol eq '<') + { $closingSymbol = '>'; } + else + { $closingSymbol = $openingSymbol; }; + + $index++; + + $self->GenericRegexpSkipUntilAfter(\$index, \$lineNumber, $closingSymbol); + + $$indexRef = $index; + $$lineNumberRef = $lineNumber; + + if ($operator =~ /^(?:s|tr|y)$/) + { + if ($openingSymbol ne $closingSymbol) + { + $self->TryToSkipWhitespace($indexRef, $lineNumberRef); + + $openingSymbol = $tokens->[$index]; + + if ($openingSymbol eq '{') + { $closingSymbol = '}'; } + elsif ($openingSymbol eq '(') + { $closingSymbol = ')'; } + elsif ($openingSymbol eq '[') + { $closingSymbol = ']'; } + elsif ($openingSymbol eq '<') + { $closingSymbol = '>'; } + else + { $closingSymbol = $openingSymbol; }; + + $$indexRef++; + }; + + if ($operator eq 's') + { + $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, $closingSymbol, 1); + } + else # ($operator eq 'tr' || $operator eq 'y') + { + while ($$indexRef < scalar @$tokens && + ($tokens->[$$indexRef] ne $closingSymbol || $self->IsBackslashed($$indexRef)) ) + { + if ($tokens->[$$indexRef] eq "\n") + { $$lineNumberRef++; }; + $$indexRef++; + }; + + $$indexRef++; + }; + }; + + # We want to skip any letters after the regexp. Otherwise something like tr/a/b/s; could have the trailing s; interpreted + # as another regexp. Whitespace is not allowed between the closing symbol and the letters. + + if ($tokens->[$$indexRef] =~ /^[a-z]/i) + { $$indexRef++; }; + + return 1; + }; + + return undef; + }; + + + +############################################################################### +# Group: Support Functions + + +# +# Function: IsStringed +# +# Returns whether the position is after a string (dollar sign) character. Returns false if it's preceded by two dollar signs so +# "if ($x == $$)" doesn't skip the closing parenthesis as stringed. +# +# Parameters: +# +# index - The index of the postition. +# +sub IsStringed #(index) + { + my ($self, $index) = @_; + my $tokens = $self->Tokens(); + + if ($index > 0 && $tokens->[$index - 1] eq '$' && !($index > 1 && $tokens->[$index - 2] eq '$')) + { return 1; } + else + { return undef; }; + }; + + +1; diff --git a/docs/tool/Modules/NaturalDocs/Languages/Prototype.pm b/docs/tool/Modules/NaturalDocs/Languages/Prototype.pm new file mode 100644 index 00000000..3a038513 --- /dev/null +++ b/docs/tool/Modules/NaturalDocs/Languages/Prototype.pm @@ -0,0 +1,92 @@ +############################################################################### +# +# Class: NaturalDocs::Languages::Prototype +# +############################################################################### +# +# A data class for storing parsed prototypes. +# +############################################################################### + +# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure +# Natural Docs is licensed under the GPL + +use strict; +use integer; + +use NaturalDocs::Languages::Prototype::Parameter; + + +package NaturalDocs::Languages::Prototype; + +use NaturalDocs::DefineMembers 'BEFORE_PARAMETERS', 'BeforeParameters()', 'SetBeforeParameters()', + 'AFTER_PARAMETERS', 'AfterParameters()', 'SetAfterParameters()', + 'PARAMETERS', 'Parameters()'; +# Dependency: New(), constant order, no parents. + + +# +# Function: New +# +# Creates and returns a new prototype object. +# +# Parameters: +# +# beforeParameters - The part of the prototype before the parameter list. +# afterParameters - The part of the prototype after the parameter list. +# +# You cannot set the parameters from here. Use <AddParameter()>. +# +sub New #(beforeParameters, afterParameters) + { + my ($package, @params) = @_; + + # Dependency: Constant order, no parents. + + my $object = [ @params ]; + bless $object, $package; + + return $object; + }; + + +# +# Functions: Members +# +# BeforeParameters - Returns the part of the prototype before the parameter list. If there is no parameter list, this will be the +# only thing that returns content. +# SetBeforeParameters - Replaces the part of the prototype before the parameter list. +# AfterParameters - Returns the part of the prototype after the parameter list, if any. +# SetAfterParameters - Replaces the part of the prototype after the parameter list. +# Parameters - Returns the parameter list as an arrayref of <NaturalDocs::Languages::Prototype::Parameters>, or undef if none. +# + +# +# Function: AddParameter +# +# Adds a <NaturalDocs::Languages::Prototype::Parameter> to the list. +# +sub AddParameter #(parameter) + { + my ($self, $parameter) = @_; + + if (!defined $self->[PARAMETERS]) + { $self->[PARAMETERS] = [ ]; }; + + push @{$self->[PARAMETERS]}, $parameter; + }; + + +# +# Function: OnlyBeforeParameters +# +# Returns whether <BeforeParameters()> is the only thing set. +# +sub OnlyBeforeParameters + { + my $self = shift; + return (!defined $self->[PARAMETERS] && !defined $self->[AFTER_PARAMETERS]); + }; + + +1; diff --git a/docs/tool/Modules/NaturalDocs/Languages/Prototype/Parameter.pm b/docs/tool/Modules/NaturalDocs/Languages/Prototype/Parameter.pm new file mode 100644 index 00000000..2d8f6bec --- /dev/null +++ b/docs/tool/Modules/NaturalDocs/Languages/Prototype/Parameter.pm @@ -0,0 +1,87 @@ +############################################################################### +# +# Class: NaturalDocs::Languages::Prototype::Parameter +# +############################################################################### +# +# A data class for storing parsed prototype parameters. +# +############################################################################### + +# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure +# Natural Docs is licensed under the GPL + +use strict; +use integer; + +package NaturalDocs::Languages::Prototype::Parameter; + +use NaturalDocs::DefineMembers 'TYPE', 'Type()', 'SetType()', + 'TYPE_PREFIX', 'TypePrefix()', 'SetTypePrefix()', + 'NAME', 'Name()', 'SetName()', + 'NAME_PREFIX', 'NamePrefix()', 'SetNamePrefix()', + 'DEFAULT_VALUE', 'DefaultValue()', 'SetDefaultValue()', + 'DEFAULT_VALUE_PREFIX', 'DefaultValuePrefix()', 'SetDefaultValuePrefix()'; +# Dependency: New() depends on the order of these constants and that they don't inherit from another class. + + +# +# Constants: Members +# +# The object is implemented as a blessed arrayref, with the following constants as its indexes. +# +# TYPE - The parameter type, if any. +# TYPE_PREFIX - The parameter type prefix which should be aligned separately, if any. +# NAME - The parameter name. +# NAME_PREFIX - The parameter name prefix which should be aligned separately, if any. +# DEFAULT_VALUE - The default value expression, if any. +# DEFAULT_VALUE_PREFIX - The default value prefix which should be aligned separately, if any. +# + +# +# Function: New +# +# Creates and returns a new prototype object. +# +# Parameters: +# +# type - The parameter type, if any. +# typePrefix - The parameter type prefix which should be aligned separately, if any. +# name - The parameter name. +# namePrefix - The parameter name prefix which should be aligned separately, if any. +# defaultValue - The default value expression, if any. +# defaultValuePrefix - The default value prefix which should be aligned separately, if any. +# +sub New #(type, typePrefix, name, namePrefix, defaultValue, defaultValuePrefix) + { + my ($package, @params) = @_; + + # Dependency: This depends on the order of the parameters being the same as the order of the constants, and that the + # constants don't inherit from another class. + + my $object = [ @params ]; + bless $object, $package; + + return $object; + }; + + +# +# Functions: Members +# +# Type - The parameter type, if any. +# SetType - Replaces the parameter type. +# TypePrefix - The parameter type prefix, which should be aligned separately, if any. +# SetTypePrefix - Replaces the parameter type prefix. +# Name - The parameter name. +# SetName - Replaces the parameter name. +# NamePrefix - The parameter name prefix, which should be aligned separately, if any. +# SetNamePrefix - Replaces the parameter name prefix. +# DefaultValue - The default value expression, if any. +# SetDefaultValue - Replaces the default value expression. +# DefaultValuePrefix - The default value prefix, which should be aligned separately, if any. +# SetDefaultValuePrefix - Replaces the default value prefix. +# + + +1; diff --git a/docs/tool/Modules/NaturalDocs/Languages/Simple.pm b/docs/tool/Modules/NaturalDocs/Languages/Simple.pm new file mode 100644 index 00000000..9d962b1c --- /dev/null +++ b/docs/tool/Modules/NaturalDocs/Languages/Simple.pm @@ -0,0 +1,503 @@ +############################################################################### +# +# Class: NaturalDocs::Languages::Simple +# +############################################################################### +# +# A class containing the characteristics of a particular programming language for basic support within Natural Docs. +# Also serves as a base class for languages that break from general conventions, such as not having parameter lists use +# parenthesis and commas. +# +############################################################################### + +# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure +# Natural Docs is licensed under the GPL + +use strict; +use integer; + +package NaturalDocs::Languages::Simple; + +use base 'NaturalDocs::Languages::Base'; +use base 'Exporter'; + +our @EXPORT = ( 'ENDER_ACCEPT', 'ENDER_IGNORE', 'ENDER_ACCEPT_AND_CONTINUE', 'ENDER_REVERT_TO_ACCEPTED' ); + + +use NaturalDocs::DefineMembers 'LINE_COMMENT_SYMBOLS', 'LineCommentSymbols()', 'SetLineCommentSymbols() duparrayref', + 'BLOCK_COMMENT_SYMBOLS', 'BlockCommentSymbols()', + 'SetBlockCommentSymbols() duparrayref', + 'PROTOTYPE_ENDERS', + 'LINE_EXTENDER', 'LineExtender()', 'SetLineExtender()', + 'PACKAGE_SEPARATOR', 'PackageSeparator()', + 'PACKAGE_SEPARATOR_WAS_SET', 'PackageSeparatorWasSet()', + 'ENUM_VALUES', 'EnumValues()', + 'ENUM_VALUES_WAS_SET', 'EnumValuesWasSet()'; + +# +# Function: New +# +# Creates and returns a new object. +# +# Parameters: +# +# name - The name of the language. +# +sub New #(name) + { + my ($selfPackage, $name) = @_; + + my $object = $selfPackage->SUPER::New($name); + + $object->[ENUM_VALUES] = ::ENUM_GLOBAL(); + $object->[PACKAGE_SEPARATOR] = '.'; + + return $object; + }; + + +# +# Functions: Members +# +# LineCommentSymbols - Returns an arrayref of symbols that start a line comment, or undef if none. +# SetLineCommentSymbols - Replaces the arrayref of symbols that start a line comment. +# BlockCommentSymbols - Returns an arrayref of start/end symbol pairs that specify a block comment, or undef if none. Pairs +# are specified with two consecutive array entries. +# SetBlockCommentSymbols - Replaces the arrayref of start/end symbol pairs that specify a block comment. Pairs are +# specified with two consecutive array entries. +# LineExtender - Returns the symbol to ignore a line break in languages where line breaks are significant. +# SetLineExtender - Replaces the symbol to ignore a line break in languages where line breaks are significant. +# PackageSeparator - Returns the package separator symbol. +# PackageSeparatorWasSet - Returns whether the package separator symbol was ever changed from the default. +# + +# +# Function: SetPackageSeparator +# Replaces the language's package separator string. +# +sub SetPackageSeparator #(separator) + { + my ($self, $separator) = @_; + $self->[PACKAGE_SEPARATOR] = $separator; + $self->[PACKAGE_SEPARATOR_WAS_SET] = 1; + }; + + +# +# Functions: Members +# +# EnumValues - Returns the <EnumValuesType> that describes how the language handles enums. +# EnumValuesWasSet - Returns whether <EnumValues> was ever changed from the default. + + +# +# Function: SetEnumValues +# Replaces the <EnumValuesType> that describes how the language handles enums. +# +sub SetEnumValues #(EnumValuesType newBehavior) + { + my ($self, $behavior) = @_; + $self->[ENUM_VALUES] = $behavior; + $self->[ENUM_VALUES_WAS_SET] = 1; + }; + + +# +# Function: PrototypeEndersFor +# +# Returns an arrayref of prototype ender symbols for the passed <TopicType>, or undef if none. +# +sub PrototypeEndersFor #(type) + { + my ($self, $type) = @_; + + if (defined $self->[PROTOTYPE_ENDERS]) + { return $self->[PROTOTYPE_ENDERS]->{$type}; } + else + { return undef; }; + }; + + +# +# Function: SetPrototypeEndersFor +# +# Replaces the arrayref of prototype ender symbols for the passed <TopicType>. +# +sub SetPrototypeEndersFor #(type, enders) + { + my ($self, $type, $enders) = @_; + + if (!defined $self->[PROTOTYPE_ENDERS]) + { $self->[PROTOTYPE_ENDERS] = { }; }; + + if (!defined $enders) + { delete $self->[PROTOTYPE_ENDERS]->{$type}; } + else + { + $self->[PROTOTYPE_ENDERS]->{$type} = [ @$enders ]; + }; + }; + + + + +############################################################################### +# Group: Parsing Functions + + +# +# Function: ParseFile +# +# Parses the passed source file, sending comments acceptable for documentation to <NaturalDocs::Parser->OnComment()> +# and all other sections to <OnCode()>. +# +# Parameters: +# +# sourceFile - The <FileName> of the source file to parse. +# topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file. +# +# Returns: +# +# Since this class cannot automatically document the code or generate a scope record, it always returns ( undef, undef ). +# +sub ParseFile #(sourceFile, topicsList) + { + my ($self, $sourceFile, $topicsList) = @_; + + open(SOURCEFILEHANDLE, '<' . $sourceFile) + or die "Couldn't open input file " . $sourceFile . "\n"; + + my @commentLines; + my @codeLines; + my $lastCommentTopicCount = 0; + + if ($self->Name() eq 'Text File') + { + my $line = <SOURCEFILEHANDLE>; + + # On the very first line, remove a Unicode BOM if present. Information on it available at: + # http://www.unicode.org/faq/utf_bom.html#BOM + $line =~ s/^\xEF\xBB\xBF//; + + while ($line) + { + ::XChomp(\$line); + push @commentLines, $line; + $line = <SOURCEFILEHANDLE>; + }; + + NaturalDocs::Parser->OnComment(\@commentLines, 1); + } + + else + { + my $line = <SOURCEFILEHANDLE>; + my $lineNumber = 1; + + # On the very first line, remove a Unicode BOM if present. Information on it available at: + # http://www.unicode.org/faq/utf_bom.html#BOM + $line =~ s/^\xEF\xBB\xBF//; + + while (defined $line) + { + ::XChomp(\$line); + my $originalLine = $line; + + + # Retrieve single line comments. This leaves $line at the next line. + + if ($self->StripOpeningSymbols(\$line, $self->LineCommentSymbols())) + { + do + { + push @commentLines, $line; + $line = <SOURCEFILEHANDLE>; + + if (!defined $line) + { goto EndDo; }; + + ::XChomp(\$line); + } + while ($self->StripOpeningSymbols(\$line, $self->LineCommentSymbols())); + + EndDo: # I hate Perl sometimes. + } + + + # Retrieve multiline comments. This leaves $line at the next line. + + elsif (my $closingSymbol = $self->StripOpeningBlockSymbols(\$line, $self->BlockCommentSymbols())) + { + # Note that it is possible for a multiline comment to start correctly but not end so. We want those comments to stay in + # the code. For example, look at this prototype with this splint annotation: + # + # int get_array(integer_t id, + # /*@out@*/ array_t array); + # + # The annotation starts correctly but doesn't end so because it is followed by code on the same line. + + my $lineRemainder; + + for (;;) + { + $lineRemainder = $self->StripClosingSymbol(\$line, $closingSymbol); + + push @commentLines, $line; + + # If we found an end comment symbol... + if (defined $lineRemainder) + { last; }; + + $line = <SOURCEFILEHANDLE>; + + if (!defined $line) + { last; }; + + ::XChomp(\$line); + }; + + if ($lineRemainder !~ /^[ \t]*$/) + { + # If there was something past the closing symbol this wasn't an acceptable comment, so move the lines to code. + push @codeLines, @commentLines; + @commentLines = ( ); + }; + + $line = <SOURCEFILEHANDLE>; + } + + + # Otherwise just add it to the code. + + else + { + push @codeLines, $line; + $line = <SOURCEFILEHANDLE>; + }; + + + # If there were comments, send them to Parser->OnComment(). + + if (scalar @commentLines) + { + # First process any code lines before the comment. + if (scalar @codeLines) + { + $self->OnCode(\@codeLines, $lineNumber, $topicsList, $lastCommentTopicCount); + $lineNumber += scalar @codeLines; + @codeLines = ( ); + }; + + $lastCommentTopicCount = NaturalDocs::Parser->OnComment(\@commentLines, $lineNumber); + $lineNumber += scalar @commentLines; + @commentLines = ( ); + }; + + }; # while (defined $line) + + + # Clean up any remaining code. + if (scalar @codeLines) + { + $self->OnCode(\@codeLines, $lineNumber, $topicsList, $lastCommentTopicCount); + @codeLines = ( ); + }; + + }; + + close(SOURCEFILEHANDLE); + + return ( undef, undef ); + }; + + +# +# Function: OnCode +# +# Called whenever a section of code is encountered by the parser. Is used to find the prototype of the last topic created. +# +# Parameters: +# +# codeLines - The source code as an arrayref of lines. +# codeLineNumber - The line number of the first line of code. +# topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file. +# lastCommentTopicCount - The number of Natural Docs topics that were created by the last comment. +# +sub OnCode #(codeLines, codeLineNumber, topicList, lastCommentTopicCount) + { + my ($self, $codeLines, $codeLineNumber, $topicList, $lastCommentTopicCount) = @_; + + if ($lastCommentTopicCount && defined $self->PrototypeEndersFor($topicList->[-1]->Type())) + { + my $lineIndex = 0; + my $prototype; + + # Skip all blank lines before a prototype. + while ($lineIndex < scalar @$codeLines && $codeLines->[$lineIndex] =~ /^[ \t]*$/) + { $lineIndex++; }; + + my @tokens; + my $tokenIndex = 0; + + my @brackets; + my $enders = $self->PrototypeEndersFor($topicList->[-1]->Type()); + + # Add prototype lines until we reach the end of the prototype or the end of the code lines. + while ($lineIndex < scalar @$codeLines) + { + my $line = $self->RemoveLineExtender($codeLines->[$lineIndex] . "\n"); + + push @tokens, $line =~ /([^\(\)\[\]\{\}\<\>]+|.)/g; + + while ($tokenIndex < scalar @tokens) + { + # If we're not inside brackets, check for ender symbols. + if (!scalar @brackets) + { + my $startingIndex = 0; + my $testPrototype; + + for (;;) + { + my ($enderIndex, $ender) = ::FindFirstSymbol($tokens[$tokenIndex], $enders, $startingIndex); + + if ($enderIndex == -1) + { last; } + else + { + # We do this here so we don't duplicate prototype for every single token. Just the first time an ender symbol + # is found in one. + if (!defined $testPrototype) + { $testPrototype = $prototype; }; + + $testPrototype .= substr($tokens[$tokenIndex], $startingIndex, $enderIndex - $startingIndex); + + my $enderResult; + + # If the ender is all text and the character preceding or following it is as well, ignore it. + if ($ender =~ /^[a-z0-9]+$/i && + ( ($enderIndex > 0 && substr($tokens[$tokenIndex], $enderIndex - 1, 1) =~ /^[a-z0-9_]$/i) || + substr($tokens[$tokenIndex], $enderIndex + length($ender), 1) =~ /^[a-z0-9_]$/i ) ) + { $enderResult = ENDER_IGNORE(); } + else + { $enderResult = $self->OnPrototypeEnd($topicList->[-1]->Type(), \$testPrototype, $ender); } + + if ($enderResult == ENDER_IGNORE()) + { + $testPrototype .= $ender; + $startingIndex = $enderIndex + length($ender); + } + elsif ($enderResult == ENDER_REVERT_TO_ACCEPTED()) + { + return; + } + else # ENDER_ACCEPT || ENDER_ACCEPT_AND_CONTINUE + { + my $titleInPrototype = $topicList->[-1]->Title(); + + # Strip parenthesis so Function(2) and Function(int, int) will still match Function(anything). + $titleInPrototype =~ s/[\t ]*\([^\(]*$//; + + if (index($testPrototype, $titleInPrototype) != -1) + { + $topicList->[-1]->SetPrototype( $self->NormalizePrototype($testPrototype) ); + }; + + if ($enderResult == ENDER_ACCEPT()) + { return; } + else # ENDER_ACCEPT_AND_CONTINUE + { + $testPrototype .= $ender; + $startingIndex = $enderIndex + length($ender); + }; + }; + }; + }; + } + + # If we are inside brackets, check for closing symbols. + elsif ( ($tokens[$tokenIndex] eq ')' && $brackets[-1] eq '(') || + ($tokens[$tokenIndex] eq ']' && $brackets[-1] eq '[') || + ($tokens[$tokenIndex] eq '}' && $brackets[-1] eq '{') || + ($tokens[$tokenIndex] eq '>' && $brackets[-1] eq '<') ) + { + pop @brackets; + }; + + # Check for opening brackets. + if ($tokens[$tokenIndex] =~ /^[\(\[\{\<]$/) + { + push @brackets, $tokens[$tokenIndex]; + }; + + $prototype .= $tokens[$tokenIndex]; + $tokenIndex++; + }; + + $lineIndex++; + }; + + # If we got out of that while loop by running out of lines, there was no prototype. + }; + }; + + +use constant ENDER_ACCEPT => 1; +use constant ENDER_IGNORE => 2; +use constant ENDER_ACCEPT_AND_CONTINUE => 3; +use constant ENDER_REVERT_TO_ACCEPTED => 4; + +# +# Function: OnPrototypeEnd +# +# Called whenever the end of a prototype is found so that there's a chance for derived classes to mark false positives. +# +# Parameters: +# +# type - The <TopicType> of the prototype. +# prototypeRef - A reference to the prototype so far, minus the ender in dispute. +# ender - The ender symbol. +# +# Returns: +# +# ENDER_ACCEPT - The ender is accepted and the prototype is finished. +# ENDER_IGNORE - The ender is rejected and parsing should continue. Note that the prototype will be rejected as a whole +# if all enders are ignored before reaching the end of the code. +# ENDER_ACCEPT_AND_CONTINUE - The ender is accepted so the prototype may stand as is. However, the prototype might +# also continue on so continue parsing. If there is no accepted ender between here and +# the end of the code this version will be accepted instead. +# ENDER_REVERT_TO_ACCEPTED - The expedition from ENDER_ACCEPT_AND_CONTINUE failed. Use the last accepted +# version and end parsing. +# +sub OnPrototypeEnd #(type, prototypeRef, ender) + { + return ENDER_ACCEPT(); + }; + + +# +# Function: RemoveLineExtender +# +# If the passed line has a line extender, returns it without the extender or the line break that follows. If it doesn't, or there are +# no line extenders defined, returns the passed line unchanged. +# +sub RemoveLineExtender #(line) + { + my ($self, $line) = @_; + + if (defined $self->LineExtender()) + { + my $lineExtenderIndex = rindex($line, $self->LineExtender()); + + if ($lineExtenderIndex != -1 && + substr($line, $lineExtenderIndex + length($self->LineExtender())) =~ /^[ \t]*\n$/) + { + $line = substr($line, 0, $lineExtenderIndex) . ' '; + }; + }; + + return $line; + }; + + +1; diff --git a/docs/tool/Modules/NaturalDocs/Languages/Tcl.pm b/docs/tool/Modules/NaturalDocs/Languages/Tcl.pm new file mode 100644 index 00000000..bd6b5a0d --- /dev/null +++ b/docs/tool/Modules/NaturalDocs/Languages/Tcl.pm @@ -0,0 +1,219 @@ +############################################################################### +# +# Class: NaturalDocs::Languages::Tcl +# +############################################################################### +# +# A subclass to handle the language variations of Tcl. +# +############################################################################### + +# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure +# Natural Docs is licensed under the GPL + +use strict; +use integer; + +package NaturalDocs::Languages::Tcl; + +use base 'NaturalDocs::Languages::Simple'; + + +# +# bool: pastFirstBrace +# +# Whether we've past the first brace in a function prototype or not. +# +my $pastFirstBrace; + + +# +# Function: OnCode +# +# This is just overridden to reset <pastFirstBrace>. +# +sub OnCode #(...) + { + my ($self, @params) = @_; + + $pastFirstBrace = 0; + + return $self->SUPER::OnCode(@params); + }; + + +# +# Function: OnPrototypeEnd +# +# Tcl's function syntax is shown below. +# +# > proc [name] { [params] } { [code] } +# +# The opening brace is one of the prototype enders. We need to allow the first opening brace because it contains the +# parameters. +# +# Also, the parameters may have braces within them. I've seen one that used { seconds 20 } as a parameter. +# +# Parameters: +# +# type - The <TopicType> of the prototype. +# prototypeRef - A reference to the prototype so far, minus the ender in dispute. +# ender - The ender symbol. +# +# Returns: +# +# ENDER_ACCEPT - The ender is accepted and the prototype is finished. +# ENDER_IGNORE - The ender is rejected and parsing should continue. Note that the prototype will be rejected as a whole +# if all enders are ignored before reaching the end of the code. +# ENDER_ACCEPT_AND_CONTINUE - The ender is accepted so the prototype may stand as is. However, the prototype might +# also continue on so continue parsing. If there is no accepted ender between here and +# the end of the code this version will be accepted instead. +# ENDER_REVERT_TO_ACCEPTED - The expedition from ENDER_ACCEPT_AND_CONTINUE failed. Use the last accepted +# version and end parsing. +# +sub OnPrototypeEnd #(type, prototypeRef, ender) + { + my ($self, $type, $prototypeRef, $ender) = @_; + + if ($type eq ::TOPIC_FUNCTION() && $ender eq '{' && !$pastFirstBrace) + { + $pastFirstBrace = 1; + return ::ENDER_IGNORE(); + } + else + { return ::ENDER_ACCEPT(); }; + }; + + +# +# Function: ParsePrototype +# +# Parses the prototype and returns it as a <NaturalDocs::Languages::Prototype> object. +# +# Parameters: +# +# type - The <TopicType>. +# prototype - The text prototype. +# +# Returns: +# +# A <NaturalDocs::Languages::Prototype> object. +# +sub ParsePrototype #(type, prototype) + { + my ($self, $type, $prototype) = @_; + + if ($type ne ::TOPIC_FUNCTION()) + { + my $object = NaturalDocs::Languages::Prototype->New($prototype); + return $object; + }; + + + # Parse the parameters out of the prototype. + + my @tokens = $prototype =~ /([^\{\}\ ]+|.)/g; + + my $parameter; + my @parameterLines; + + my $braceLevel = 0; + + my ($beforeParameters, $afterParameters, $finishedParameters); + + foreach my $token (@tokens) + { + if ($finishedParameters) + { $afterParameters .= $token; } + + elsif ($token eq '{') + { + if ($braceLevel == 0) + { $beforeParameters .= $token; } + + else # braceLevel > 0 + { $parameter .= $token; }; + + $braceLevel++; + } + + elsif ($token eq '}') + { + if ($braceLevel == 1) + { + if ($parameter && $parameter ne ' ') + { push @parameterLines, $parameter; }; + + $finishedParameters = 1; + $afterParameters .= $token; + + $braceLevel--; + } + elsif ($braceLevel > 1) + { + $parameter .= $token; + $braceLevel--; + }; + } + + elsif ($token eq ' ') + { + if ($braceLevel == 1) + { + if ($parameter) + { push @parameterLines, $parameter; }; + + $parameter = undef; + } + elsif ($braceLevel > 1) + { + $parameter .= $token; + } + else + { + $beforeParameters .= $token; + }; + } + + else + { + if ($braceLevel > 0) + { $parameter .= $token; } + else + { $beforeParameters .= $token; }; + }; + }; + + foreach my $part (\$beforeParameters, \$afterParameters) + { + $$part =~ s/^ //; + $$part =~ s/ $//; + }; + + my $prototypeObject = NaturalDocs::Languages::Prototype->New($beforeParameters, $afterParameters); + + + # Parse the actual parameters. + + foreach my $parameterLine (@parameterLines) + { + $prototypeObject->AddParameter( $self->ParseParameterLine($parameterLine) ); + }; + + return $prototypeObject; + }; + + +# +# Function: ParseParameterLine +# +# Parses a prototype parameter line and returns it as a <NaturalDocs::Languages::Prototype::Parameter> object. +# +sub ParseParameterLine #(line) + { + my ($self, $line) = @_; + return NaturalDocs::Languages::Prototype::Parameter->New(undef, undef, $line, undef, undef, undef); + }; + + +1; |