diff options
Diffstat (limited to 'docs/tool/Modules/NaturalDocs/Languages/Base.pm')
| -rw-r--r-- | docs/tool/Modules/NaturalDocs/Languages/Base.pm | 832 |
1 files changed, 832 insertions, 0 deletions
diff --git a/docs/tool/Modules/NaturalDocs/Languages/Base.pm b/docs/tool/Modules/NaturalDocs/Languages/Base.pm new file mode 100644 index 00000000..f89b7045 --- /dev/null +++ b/docs/tool/Modules/NaturalDocs/Languages/Base.pm @@ -0,0 +1,832 @@ +############################################################################### +# +# Class: NaturalDocs::Languages::Base +# +############################################################################### +# +# A base class for all programming language parsers. +# +############################################################################### + +# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure +# Natural Docs is licensed under the GPL + +use strict; +use integer; + +package NaturalDocs::Languages::Base; + +use NaturalDocs::DefineMembers 'NAME', 'Name()', + 'EXTENSIONS', 'Extensions()', 'SetExtensions() duparrayref', + 'SHEBANG_STRINGS', 'ShebangStrings()', 'SetShebangStrings() duparrayref', + 'IGNORED_PREFIXES', + 'ENUM_VALUES'; + +use base 'Exporter'; +our @EXPORT = ('ENUM_GLOBAL', 'ENUM_UNDER_TYPE', 'ENUM_UNDER_PARENT'); + + +# +# Constants: EnumValuesType +# +# How enum values are handled in the language. +# +# ENUM_GLOBAL - Values are always global and thus 'value'. +# ENUM_UNDER_TYPE - Values are under the type in the hierarchy, and thus 'package.enum.value'. +# ENUM_UNDER_PARENT - Values are under the parent in the hierarchy, putting them on the same level as the enum itself. Thus +# 'package.value'. +# +use constant ENUM_GLOBAL => 1; +use constant ENUM_UNDER_TYPE => 2; +use constant ENUM_UNDER_PARENT => 3; + + +# +# Handle: SOURCEFILEHANDLE +# +# The handle of the source file currently being parsed. +# + + +# +# Function: New +# +# Creates and returns a new object. +# +# Parameters: +# +# name - The name of the language. +# +sub New #(name) + { + my ($selfPackage, $name) = @_; + + my $object = [ ]; + + $object->[NAME] = $name; + + bless $object, $selfPackage; + return $object; + }; + + +# +# Functions: Members +# +# Name - Returns the language's name. +# Extensions - Returns an arrayref of the language's file extensions, or undef if none. +# SetExtensions - Replaces the arrayref of the language's file extensions. +# ShebangStrings - Returns an arrayref of the language's shebang strings, or undef if none. +# SetShebangStrings - Replaces the arrayref of the language's shebang strings. +# + +# +# Function: PackageSeparator +# Returns the language's package separator string. +# +sub PackageSeparator + { return '.'; }; + +# +# Function: PackageSeparatorWasSet +# Returns whether the language's package separator string was ever changed from the default. +# +sub PackageSeparatorWasSet + { return 0; }; + + +# +# Function: EnumValues +# Returns the <EnumValuesType> that describes how the language handles enums. +# +sub EnumValues + { return ENUM_GLOBAL; }; + + +# +# Function: IgnoredPrefixesFor +# +# Returns an arrayref of ignored prefixes for the passed <TopicType>, or undef if none. The array is sorted so that the longest +# prefixes are first. +# +sub IgnoredPrefixesFor #(type) + { + my ($self, $type) = @_; + + if (defined $self->[IGNORED_PREFIXES]) + { return $self->[IGNORED_PREFIXES]->{$type}; } + else + { return undef; }; + }; + + +# +# Function: SetIgnoredPrefixesFor +# +# Replaces the arrayref of ignored prefixes for the passed <TopicType>. +# +sub SetIgnoredPrefixesFor #(type, prefixes) + { + my ($self, $type, $prefixesRef) = @_; + + if (!defined $self->[IGNORED_PREFIXES]) + { $self->[IGNORED_PREFIXES] = { }; }; + + if (!defined $prefixesRef) + { delete $self->[IGNORED_PREFIXES]->{$type}; } + else + { + my $prefixes = [ @$prefixesRef ]; + + # Sort prefixes to be longest to shortest. + @$prefixes = sort { length $b <=> length $a } @$prefixes; + + $self->[IGNORED_PREFIXES]->{$type} = $prefixes; + }; + }; + + +# +# Function: HasIgnoredPrefixes +# +# Returns whether the language has any ignored prefixes at all. +# +sub HasIgnoredPrefixes + { return defined $_[0]->[IGNORED_PREFIXES]; }; + + +# +# Function: CopyIgnoredPrefixesOf +# +# Copies all the ignored prefix settings of the passed <NaturalDocs::Languages::Base> object. +# +sub CopyIgnoredPrefixesOf #(language) + { + my ($self, $language) = @_; + + if ($language->HasIgnoredPrefixes()) + { + $self->[IGNORED_PREFIXES] = { }; + + while (my ($topicType, $prefixes) = each %{$language->[IGNORED_PREFIXES]}) + { + $self->[IGNORED_PREFIXES]->{$topicType} = [ @$prefixes ]; + }; + }; + }; + + + +############################################################################### +# Group: Parsing Functions + + +# +# Function: ParseFile +# +# Parses the passed source file, sending comments acceptable for documentation to <NaturalDocs::Parser->OnComment()>. +# This *must* be defined by a subclass. +# +# Parameters: +# +# sourceFile - The <FileName> of the source file to parse. +# topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file. +# +# Returns: +# +# The array ( autoTopics, scopeRecord ). +# +# autoTopics - An arrayref of automatically generated <NaturalDocs::Parser::ParsedTopics> from the file, or undef if none. +# scopeRecord - An arrayref of <NaturalDocs::Languages::Advanced::ScopeChanges>, or undef if none. +# + + +# +# Function: ParsePrototype +# +# Parses the prototype and returns it as a <NaturalDocs::Languages::Prototype> object. +# +# Parameters: +# +# type - The <TopicType>. +# prototype - The text prototype. +# +# Returns: +# +# A <NaturalDocs::Languages::Prototype> object. +# +sub ParsePrototype #(type, prototype) + { + my ($self, $type, $prototype) = @_; + + my $isClass = NaturalDocs::Topics->TypeInfo($type)->ClassHierarchy(); + + if ($prototype !~ /\(.*[^ ].*\)/ && (!$isClass || $prototype !~ /\{.*[^ ].*\}/)) + { + my $object = NaturalDocs::Languages::Prototype->New($prototype); + return $object; + }; + + + # Parse the parameters out of the prototype. + + my @tokens = $prototype =~ /([^\(\)\[\]\{\}\<\>\'\"\,\;]+|.)/g; + + my $parameter; + my @parameterLines; + + my @symbolStack; + my $finishedParameters; + + my ($beforeParameters, $afterParameters); + + foreach my $token (@tokens) + { + if ($finishedParameters) + { $afterParameters .= $token; } + + elsif ($symbolStack[-1] eq '\'' || $symbolStack[-1] eq '"') + { + if ($symbolStack[0] eq '(' || ($isClass && $symbolStack[0] eq '{')) + { $parameter .= $token; } + else + { $beforeParameters .= $token; }; + + if ($token eq $symbolStack[-1]) + { pop @symbolStack; }; + } + + elsif ($token =~ /^[\(\[\{\<\'\"]$/) + { + if ($symbolStack[0] eq '(' || ($isClass && $symbolStack[0] eq '{')) + { $parameter .= $token; } + else + { $beforeParameters .= $token; }; + + push @symbolStack, $token; + } + + elsif ( ($token eq ')' && $symbolStack[-1] eq '(') || + ($token eq ']' && $symbolStack[-1] eq '[') || + ($token eq '}' && $symbolStack[-1] eq '{') || + ($token eq '>' && $symbolStack[-1] eq '<') ) + { + if ($symbolStack[0] eq '(') + { + if ($token eq ')' && scalar @symbolStack == 1) + { + if ($parameter ne ' ') + { push @parameterLines, $parameter; }; + + $finishedParameters = 1; + $afterParameters .= $token; + } + else + { $parameter .= $token; }; + } + elsif ($isClass && $symbolStack[0] eq '{') + { + if ($token eq '}' && scalar @symbolStack == 1) + { + if ($parameter ne ' ') + { push @parameterLines, $parameter; }; + + $finishedParameters = 1; + $afterParameters .= $token; + } + else + { $parameter .= $token; }; + } + else + { + $beforeParameters .= $token; + }; + + pop @symbolStack; + } + + elsif ($token eq ',' || $token eq ';') + { + if ($symbolStack[0] eq '(' || ($isClass && $symbolStack[0] eq '{')) + { + if (scalar @symbolStack == 1) + { + push @parameterLines, $parameter . $token; + $parameter = undef; + } + else + { + $parameter .= $token; + }; + } + else + { + $beforeParameters .= $token; + }; + } + + else + { + if ($symbolStack[0] eq '(' || ($isClass && $symbolStack[0] eq '{')) + { $parameter .= $token; } + else + { $beforeParameters .= $token; }; + }; + }; + + foreach my $part (\$beforeParameters, \$afterParameters) + { + $$part =~ s/^ //; + $$part =~ s/ $//; + }; + + my $prototypeObject = NaturalDocs::Languages::Prototype->New($beforeParameters, $afterParameters); + + + # Parse the actual parameters. + + foreach my $parameterLine (@parameterLines) + { + $prototypeObject->AddParameter( $self->ParseParameterLine($parameterLine) ); + }; + + return $prototypeObject; + }; + + +# +# Function: ParseParameterLine +# +# Parses a prototype parameter line and returns it as a <NaturalDocs::Languages::Prototype::Parameter> object. +# +# This vesion assumes a C++ style line. If you need a Pascal style line, override this function to forward to +# <ParsePascalParameterLine()>. +# +# > Function(parameter, type parameter, type parameter = value); +# +sub ParseParameterLine #(line) + { + my ($self, $line) = @_; + + $line =~ s/^ //; + $line =~ s/ $//; + + my @tokens = $line =~ /([^ \(\)\{\}\[\]\<\>\'\"\=]+|.)/g; + + my @symbolStack; + my @parameterWords = ( undef ); + my ($defaultValue, $defaultValuePrefix, $inDefaultValue); + + foreach my $token (@tokens) + { + if ($inDefaultValue) + { $defaultValue .= $token; } + + elsif ($symbolStack[-1] eq '\'' || $symbolStack[-1] eq '"') + { + $parameterWords[-1] .= $token; + + if ($token eq $symbolStack[-1]) + { pop @symbolStack; }; + } + + elsif ($token =~ /^[\(\[\{\<\'\"]$/) + { + push @symbolStack, $token; + $parameterWords[-1] .= $token; + } + + elsif ( ($token eq ')' && $symbolStack[-1] eq '(') || + ($token eq ']' && $symbolStack[-1] eq '[') || + ($token eq '}' && $symbolStack[-1] eq '{') || + ($token eq '>' && $symbolStack[-1] eq '<') ) + { + pop @symbolStack; + $parameterWords[-1] .= $token; + } + + elsif ($token eq ' ') + { + if (!scalar @symbolStack) + { push @parameterWords, undef; } + else + { $parameterWords[-1] .= $token; }; + } + + elsif ($token eq '=') + { + if (!scalar @symbolStack) + { + $defaultValuePrefix = $token; + $inDefaultValue = 1; + } + else + { $parameterWords[-1] .= $token; }; + } + + else + { + $parameterWords[-1] .= $token; + }; + }; + + my ($name, $namePrefix, $type, $typePrefix); + + if (!$parameterWords[-1]) + { pop @parameterWords; }; + + $name = pop @parameterWords; + + if ($parameterWords[-1]=~ /([\*\&]+)$/) + { + $namePrefix = $1; + $parameterWords[-1] = substr($parameterWords[-1], 0, 0 - length($namePrefix)); + $parameterWords[-1] =~ s/ $//; + + if (!$parameterWords[-1]) + { pop @parameterWords; }; + } + elsif ($name =~ /^([\*\&]+)/) + { + $namePrefix = $1; + $name = substr($name, length($namePrefix)); + $name =~ s/^ //; + }; + + $type = pop @parameterWords; + $typePrefix = join(' ', @parameterWords); + + if ($typePrefix) + { $typePrefix .= ' '; }; + + if ($type =~ /^([a-z0-9_\:\.]+(?:\.|\:\:))[a-z0-9_]/i) + { + my $attachedTypePrefix = $1; + + $typePrefix .= $attachedTypePrefix; + $type = substr($type, length($attachedTypePrefix)); + }; + + $defaultValue =~ s/ $//; + + return NaturalDocs::Languages::Prototype::Parameter->New($type, $typePrefix, $name, $namePrefix, + $defaultValue, $defaultValuePrefix); + }; + + +# +# Function: ParsePascalParameterLine +# +# Parses a Pascal-like prototype parameter line and returns it as a <NaturalDocs::Languages::Prototype::Parameter> object. +# Pascal lines are as follows: +# +# > Function (name: type; name, name: type := value) +# +# Also supports ActionScript lines +# +# > Function (name: type, name, name: type = value) +# +sub ParsePascalParameterLine #(line) + { + my ($self, $line) = @_; + + $line =~ s/^ //; + $line =~ s/ $//; + + my @tokens = $line =~ /([^\(\)\{\}\[\]\<\>\'\"\=\:]+|\:\=|.)/g; + my ($type, $name, $defaultValue, $defaultValuePrefix, $afterName, $afterDefaultValue); + my @symbolStack; + + foreach my $token (@tokens) + { + if ($afterDefaultValue) + { $defaultValue .= $token; } + + elsif ($symbolStack[-1] eq '\'' || $symbolStack[-1] eq '"') + { + if ($afterName) + { $type .= $token; } + else + { $name .= $token; }; + + if ($token eq $symbolStack[-1]) + { pop @symbolStack; }; + } + + elsif ($token =~ /^[\(\[\{\<\'\"]$/) + { + push @symbolStack, $token; + + if ($afterName) + { $type .= $token; } + else + { $name .= $token; }; + } + + elsif ( ($token eq ')' && $symbolStack[-1] eq '(') || + ($token eq ']' && $symbolStack[-1] eq '[') || + ($token eq '}' && $symbolStack[-1] eq '{') || + ($token eq '>' && $symbolStack[-1] eq '<') ) + { + pop @symbolStack; + + if ($afterName) + { $type .= $token; } + else + { $name .= $token; }; + } + + elsif ($afterName) + { + if (($token eq ':=' || $token eq '=') && !scalar @symbolStack) + { + $defaultValuePrefix = $token; + $afterDefaultValue = 1; + } + else + { $type .= $token; }; + } + + elsif ($token eq ':' && !scalar @symbolStack) + { + $name .= $token; + $afterName = 1; + } + + else + { $name .= $token; }; + }; + + foreach my $part (\$type, \$name, \$defaultValue) + { + $$part =~ s/^ //; + $$part =~ s/ $//; + }; + + return NaturalDocs::Languages::Prototype::Parameter->New($type, undef, $name, undef, $defaultValue, $defaultValuePrefix); + }; + + +# +# Function: TypeBeforeParameter +# +# Returns whether the type appears before the parameter in prototypes. +# +# For example, it does in C++ +# > void Function (int a, int b) +# +# but does not in Pascal +# > function Function (a: int; b, c: int) +# +sub TypeBeforeParameter + { + return 1; + }; + + + +# +# Function: IgnoredPrefixLength +# +# Returns the length of the prefix that should be ignored in the index, or zero if none. +# +# Parameters: +# +# name - The name of the symbol. +# type - The symbol's <TopicType>. +# +# Returns: +# +# The length of the prefix to ignore, or zero if none. +# +sub IgnoredPrefixLength #(name, type) + { + my ($self, $name, $type) = @_; + + foreach my $prefixes ($self->IgnoredPrefixesFor($type), $self->IgnoredPrefixesFor(::TOPIC_GENERAL())) + { + if (defined $prefixes) + { + foreach my $prefix (@$prefixes) + { + if (substr($name, 0, length($prefix)) eq $prefix) + { return length($prefix); }; + }; + }; + }; + + return 0; + }; + + + +############################################################################### +# Group: Support Functions + + +# +# Function: StripOpeningSymbols +# +# Determines if the line starts with any of the passed symbols, and if so, replaces it with spaces. This only happens +# if the only thing before it on the line is whitespace. +# +# Parameters: +# +# lineRef - A reference to the line to check. +# symbols - An arrayref of the symbols to check for. +# +# Returns: +# +# If the line starts with any of the passed comment symbols, it will replace it in the line with spaces and return the symbol. +# If the line doesn't, it will leave the line alone and return undef. +# +sub StripOpeningSymbols #(lineRef, symbols) + { + my ($self, $lineRef, $symbols) = @_; + + if (!defined $symbols) + { return undef; }; + + my ($index, $symbol) = ::FindFirstSymbol($$lineRef, $symbols); + + if ($index != -1 && substr($$lineRef, 0, $index) =~ /^[ \t]*$/) + { + return substr($$lineRef, $index, length($symbol), ' ' x length($symbol)); + }; + + return undef; + }; + + +# +# Function: StripOpeningJavaDocSymbols +# +# Determines if the line starts with any of the passed symbols, and if so, replaces it with spaces. This only happens +# if the only thing before it on the line is whitespace and the next character after it is whitespace or the end of the line. +# +# Parameters: +# +# lineRef - A reference to the line to check. +# symbols - An arrayref of the symbols to check for. +# +# Returns: +# +# If the line starts with any of the passed comment symbols, it will replace it in the line with spaces and return the symbol. +# If the line doesn't, it will leave the line alone and return undef. +# +sub StripOpeningJavaDocSymbols #(lineRef, symbols) + { + my ($self, $lineRef, $symbols) = @_; + + if (!defined $symbols) + { return undef; }; + + my ($index, $symbol) = ::FindFirstSymbol($$lineRef, $symbols); + + if ($index != -1 && substr($$lineRef, 0, $index) =~ /^[ \t]*$/ && substr($$lineRef, $index + length($symbol), 1) =~ /^[ \t]?$/) + { + return substr($$lineRef, $index, length($symbol), ' ' x length($symbol)); + }; + + return undef; + }; + + +# +# Function: StripOpeningBlockSymbols +# +# Determines if the line starts with any of the opening symbols in the passed symbol pairs, and if so, replaces it with spaces. +# This only happens if the only thing before it on the line is whitespace. +# +# Parameters: +# +# lineRef - A reference to the line to check. +# symbolPairs - An arrayref of the symbol pairs to check for. Pairs are specified as two consecutive array entries, with the +# opening symbol first. +# +# Returns: +# +# If the line starts with any of the opening symbols, it will replace it in the line with spaces and return the closing symbol. +# If the line doesn't, it will leave the line alone and return undef. +# +sub StripOpeningBlockSymbols #(lineRef, symbolPairs) + { + my ($self, $lineRef, $symbolPairs) = @_; + + if (!defined $symbolPairs) + { return undef; }; + + for (my $i = 0; $i < scalar @$symbolPairs; $i += 2) + { + my $index = index($$lineRef, $symbolPairs->[$i]); + + if ($index != -1 && substr($$lineRef, 0, $index) =~ /^[ \t]*$/) + { + substr($$lineRef, $index, length($symbolPairs->[$i]), ' ' x length($symbolPairs->[$i])); + return $symbolPairs->[$i + 1]; + }; + }; + + return undef; + }; + + +# +# Function: StripOpeningJavaDocBlockSymbols +# +# Determines if the line starts with any of the opening symbols in the passed symbol pairs, and if so, replaces it with spaces. +# This only happens if the only thing before it on the line is whitespace and the next character is whitespace or the end of the line. +# +# Parameters: +# +# lineRef - A reference to the line to check. +# symbolPairs - An arrayref of the symbol pairs to check for. Pairs are specified as two consecutive array entries, with the +# opening symbol first. +# +# Returns: +# +# If the line starts with any of the opening symbols, it will replace it in the line with spaces and return the closing symbol. +# If the line doesn't, it will leave the line alone and return undef. +# +sub StripOpeningJavaDocBlockSymbols #(lineRef, symbolPairs) + { + my ($self, $lineRef, $symbolPairs) = @_; + + if (!defined $symbolPairs) + { return undef; }; + + for (my $i = 0; $i < scalar @$symbolPairs; $i += 2) + { + my $index = index($$lineRef, $symbolPairs->[$i]); + + if ($index != -1 && substr($$lineRef, 0, $index) =~ /^[ \t]*$/ && + substr($$lineRef, $index + length($symbolPairs->[$i]), 1) =~ /^[ \t]?$/) + { + substr($$lineRef, $index, length($symbolPairs->[$i]), ' ' x length($symbolPairs->[$i])); + return $symbolPairs->[$i + 1]; + }; + }; + + return undef; + }; + + +# +# Function: StripClosingSymbol +# +# Determines if the line contains a symbol, and if so, truncates it just before the symbol. +# +# Parameters: +# +# lineRef - A reference to the line to check. +# symbol - The symbol to check for. +# +# Returns: +# +# The remainder of the line, or undef if the symbol was not found. +# +sub StripClosingSymbol #(lineRef, symbol) + { + my ($self, $lineRef, $symbol) = @_; + + my $index = index($$lineRef, $symbol); + + if ($index != -1) + { + my $lineRemainder = substr($$lineRef, $index + length($symbol)); + $$lineRef = substr($$lineRef, 0, $index); + + return $lineRemainder; + } + else + { return undef; }; + }; + + +# +# Function: NormalizePrototype +# +# Normalizes a prototype. Specifically, condenses spaces, tabs, and line breaks into single spaces and removes leading and +# trailing ones. +# +# Parameters: +# +# prototype - The original prototype string. +# +# Returns: +# +# The normalized prototype. +# +sub NormalizePrototype #(prototype) + { + my ($self, $prototype) = @_; + + $prototype =~ tr/ \t\r\n/ /s; + $prototype =~ s/^ //; + $prototype =~ s/ $//; + + return $prototype; + }; + + +1; |