diff options
Diffstat (limited to 'docs/doctool/Modules/NaturalDocs/Languages/Perl.pm')
| -rw-r--r-- | docs/doctool/Modules/NaturalDocs/Languages/Perl.pm | 1338 |
1 files changed, 0 insertions, 1338 deletions
diff --git a/docs/doctool/Modules/NaturalDocs/Languages/Perl.pm b/docs/doctool/Modules/NaturalDocs/Languages/Perl.pm deleted file mode 100644 index ca9d24fc..00000000 --- a/docs/doctool/Modules/NaturalDocs/Languages/Perl.pm +++ /dev/null @@ -1,1338 +0,0 @@ -############################################################################### -# -# Class: NaturalDocs::Languages::Perl -# -############################################################################### -# -# A subclass to handle the language variations of Perl. -# -# -# Topic: Language Support -# -# Supported: -# -# - Packages -# - Inheritance via "use base" and "@ISA =". -# - Functions -# - Variables -# -# Not supported yet: -# -# - Constants -# -############################################################################### - -# This file is part of Natural Docs, which is Copyright (C) 2003-2005 Greg Valure -# Natural Docs is licensed under the GPL - -use strict; -use integer; - -package NaturalDocs::Languages::Perl; - -use base 'NaturalDocs::Languages::Advanced'; - - -# -# bool: inNDPOD -# Set whenever we're in ND POD in <PreprocessLine()>. -# -my $inNDPOD; - -# -# bool: mustBreakPOD -# Set whenever the next line needs to be prefixed with "(NDPODBREAK)" in <PreprocessLine()>. -# -my $mustBreakPOD; - -# -# array: hereDocTerminators -# An array of active Here Doc terminators, or an empty array if not active. Each entry is an arrayref of tokens. The entries -# must appear in the order they must appear in the source. -# -my @hereDocTerminators; - - - -############################################################################### -# Group: Interface Functions - - -# -# Function: PackageSeparator -# Returns the package separator symbol. -# -sub PackageSeparator - { return '::'; }; - -# -# Function: EnumValues -# Returns the <EnumValuesType> that describes how the language handles enums. -# -sub EnumValues - { return ::ENUM_GLOBAL(); }; - - -# -# Function: ParseFile -# -# Parses the passed source file, sending comments acceptable for documentation to <NaturalDocs::Parser->OnComment()>. -# -# Parameters: -# -# sourceFile - The name of the source file to parse. -# topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file. -# -# Returns: -# -# The array ( autoTopics, scopeRecord ). -# -# autoTopics - An arrayref of automatically generated topics from the file, or undef if none. -# scopeRecord - An arrayref of <NaturalDocs::Languages::Advanced::ScopeChanges>, or undef if none. -# -sub ParseFile #(sourceFile, topicsList) - { - my ($self, $sourceFile, $topicsList) = @_; - - $inNDPOD = 0; - $mustBreakPOD = 0; - @hereDocTerminators = ( ); - - $self->ParseForCommentsAndTokens($sourceFile, [ '#' ], [ '=begin nd', '=end nd' ]); - - my $tokens = $self->Tokens(); - my $index = 0; - my $lineNumber = 1; - - while ($index < scalar @$tokens) - { - if ($self->TryToSkipWhitespace(\$index, \$lineNumber) || - $self->TryToGetPackage(\$index, \$lineNumber) || - $self->TryToGetBase(\$index, \$lineNumber) || - $self->TryToGetFunction(\$index, \$lineNumber) || - $self->TryToGetVariable(\$index, \$lineNumber) ) - { - # The functions above will handle everything. - } - - elsif ($tokens->[$index] eq '{') - { - $self->StartScope('}', $lineNumber, undef); - $index++; - } - - elsif ($tokens->[$index] eq '}') - { - if ($self->ClosingScopeSymbol() eq '}') - { $self->EndScope($lineNumber); }; - - $index++; - } - - elsif (lc($tokens->[$index]) eq 'eval') - { - # We want to skip the token in this case instead of letting it fall to SkipRestOfStatement. This allows evals with braces - # to be treated like normal floating braces. - $index++; - } - - else - { - $self->SkipRestOfStatement(\$index, \$lineNumber); - }; - }; - - - # Don't need to keep these around. - $self->ClearTokens(); - - return ( $self->AutoTopics(), $self->ScopeRecord() ); - }; - - -# -# Function: PreprocessLine -# -# Overridden to support "=begin nd" and similar. -# -# - "=begin [nd|naturaldocs|natural docs]" all translate to "=begin nd". -# - "=[nd|naturaldocs|natural docs]" also translate to "=begin nd". -# - "=end [nd|naturaldocs|natural docs]" all translate to "=end nd". -# - "=cut" from a ND block translates into "=end nd", but the next line will be altered to begin with "(NDPODBREAK)". This is -# so if there is POD leading into ND which ends with a cut, the parser can still end the original POD because the end ND line -# would have been removed. -# - "=pod begin nd" and "=pod end nd" are supported for compatibility with ND 1.32 and earlier, even though the syntax is a -# mistake. -# -sub PreprocessLine #(lineRef) - { - my ($self, $lineRef) = @_; - - if ($$lineRef =~ /^\=(?:(?:pod[ \t]+)?begin[ \t]+)?(?:nd|naturaldocs|natural[ \t]+docs)[ \t]*$/i) - { - $$lineRef = '=begin nd'; - $inNDPOD = 1; - $mustBreakPOD = 0; - } - elsif ($$lineRef =~ /^\=(?:pod[ \t]+)end[ \t]+(?:nd|naturaldocs|natural[ \t]+docs)[ \t]*$/i) - { - $$lineRef = '=end nd'; - $inNDPOD = 0; - $mustBreakPOD = 0; - } - elsif ($$lineRef =~ /^\=cut[ \t]*$/i) - { - if ($inNDPOD) - { - $$lineRef = '=end nd'; - $inNDPOD = 0; - $mustBreakPOD = 1; - }; - } - elsif ($mustBreakPOD) - { - $$lineRef = '(NDPODBREAK)' . $$lineRef; - $mustBreakPOD = 0; - }; - }; - - - -############################################################################### -# Group: Statement Parsing Functions -# All functions here assume that the current position is at the beginning of a statement. -# -# Note for developers: I am well aware that the code in these functions do not check if we're past the end of the tokens as -# often as it should. We're making use of the fact that Perl will always return undef in these cases to keep the code simpler. - - -# -# Function: TryToGetPackage -# -# Determines whether the position is at a package declaration statement, and if so, generates a topic for it, skips it, and -# returns true. -# -sub TryToGetPackage #(indexRef, lineNumberRef) - { - my ($self, $indexRef, $lineNumberRef) = @_; - my $tokens = $self->Tokens(); - - if (lc($tokens->[$$indexRef]) eq 'package') - { - my $index = $$indexRef + 1; - my $lineNumber = $$lineNumberRef; - - if (!$self->TryToSkipWhitespace(\$index, \$lineNumber)) - { return undef; }; - - my $name; - - while ($tokens->[$index] =~ /^[a-z_\:]/i) - { - $name .= $tokens->[$index]; - $index++; - }; - - if (!defined $name) - { return undef; }; - - my $autoTopic = NaturalDocs::Parser::ParsedTopic->New(::TOPIC_CLASS(), $name, - undef, undef, - undef, - undef, undef, $$lineNumberRef); - $self->AddAutoTopic($autoTopic); - - NaturalDocs::Parser->OnClass($autoTopic->Symbol()); - - $self->SetPackage($autoTopic->Symbol(), $$lineNumberRef); - - $$indexRef = $index; - $$lineNumberRef = $lineNumber; - $self->SkipRestOfStatement($indexRef, $lineNumberRef); - - return 1; - }; - - return undef; - }; - - -# -# Function: TryToGetBase -# -# Determines whether the position is at a package base declaration statement, and if so, calls -# <NaturalDocs::Parser->OnClassParent()>. -# -# Supported Syntaxes: -# -# > use base [list of strings] -# > @ISA = [list of strings] -# > @[package]::ISA = [list of strings] -# > our @ISA = [list of strings] -# -sub TryToGetBase #(indexRef, lineNumberRef) - { - my ($self, $indexRef, $lineNumberRef) = @_; - my $tokens = $self->Tokens(); - - my ($index, $lineNumber, $class, $parents); - - if (lc($tokens->[$$indexRef]) eq 'use') - { - $index = $$indexRef + 1; - $lineNumber = $$lineNumberRef; - - if (!$self->TryToSkipWhitespace(\$index, \$lineNumber) || - lc($tokens->[$index]) ne 'base') - { return undef; } - - $index++; - $self->TryToSkipWhitespace(\$index, \$lineNumber); - - $parents = $self->TryToGetListOfStrings(\$index, \$lineNumber); - } - - else - { - $index = $$indexRef; - $lineNumber = $$lineNumberRef; - - if (lc($tokens->[$index]) eq 'our') - { - $index++; - $self->TryToSkipWhitespace(\$index, \$lineNumber); - }; - - if ($tokens->[$index] eq '@') - { - $index++; - - while ($index < scalar @$tokens) - { - if ($tokens->[$index] eq 'ISA') - { - $index++; - $self->TryToSkipWhitespace(\$index, \$lineNumber); - - if ($tokens->[$index] eq '=') - { - $index++; - $self->TryToSkipWhitespace(\$index, \$lineNumber); - - $parents = $self->TryToGetListOfStrings(\$index, \$lineNumber); - } - else - { last; }; - } - - # If token isn't ISA... - elsif ($tokens->[$index] =~ /^[a-z0-9_:]/i) - { - $class .= $tokens->[$index]; - $index++; - } - else - { last; }; - }; - }; - }; - - if (defined $parents) - { - if (defined $class) - { - $class =~ s/::$//; - my @classIdentifiers = split(/::/, $class); - $class = NaturalDocs::SymbolString->Join(@classIdentifiers); - } - else - { $class = $self->CurrentScope(); }; - - foreach my $parent (@$parents) - { - my @parentIdentifiers = split(/::/, $parent); - my $parentSymbol = NaturalDocs::SymbolString->Join(@parentIdentifiers); - - NaturalDocs::Parser->OnClassParent($class, $parentSymbol, undef, undef, ::RESOLVE_ABSOLUTE()); - }; - - $$indexRef = $index; - $$lineNumberRef = $lineNumber; - $self->SkipRestOfStatement($indexRef, $lineNumberRef); - - return 1; - } - else - { return undef; }; - }; - - -# -# Function: TryToGetFunction -# -# Determines whether the position is at a function declaration statement, and if so, generates a topic for it, skips it, and -# returns true. -# -sub TryToGetFunction #(indexRef, lineNumberRef) - { - my ($self, $indexRef, $lineNumberRef) = @_; - my $tokens = $self->Tokens(); - - if ( lc($tokens->[$$indexRef]) eq 'sub') - { - my $prototypeStart = $$indexRef; - my $prototypeStartLine = $$lineNumberRef; - my $prototypeEnd = $$indexRef + 1; - my $prototypeEndLine = $$lineNumberRef; - - if ( !$self->TryToSkipWhitespace(\$prototypeEnd, \$prototypeEndLine) || - $tokens->[$prototypeEnd] !~ /^[a-z_]/i ) - { return undef; }; - - my $name = $tokens->[$prototypeEnd]; - $prototypeEnd++; - - # We parsed 'sub [name]'. Now keep going until we find a semicolon or a brace. - - for (;;) - { - if ($prototypeEnd >= scalar @$tokens) - { return undef; } - - # End if we find a semicolon, since it means we found a predeclaration rather than an actual function. - elsif ($tokens->[$prototypeEnd] eq ';') - { return undef; } - - elsif ($tokens->[$prototypeEnd] eq '{') - { - # Found it! - - my $prototype = $self->NormalizePrototype( $self->CreateString($prototypeStart, $prototypeEnd) ); - - $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New(::TOPIC_FUNCTION(), $name, - $self->CurrentScope(), undef, - $prototype, - undef, undef, $prototypeStartLine)); - - $$indexRef = $prototypeEnd; - $$lineNumberRef = $prototypeEndLine; - - $self->SkipRestOfStatement($indexRef, $lineNumberRef); - - return 1; - } - - else - { $self->GenericSkip(\$prototypeEnd, \$prototypeEndLine, 0, 1); }; - }; - } - else - { return undef; }; - }; - - -# -# Function: TryToGetVariable -# -# Determines if the position is at a variable declaration statement, and if so, generates a topic for it, skips it, and returns -# true. -# -# Supported Syntaxes: -# -# - Supports variables declared with "my", "our", and "local". -# - Supports multiple declarations in one statement, such as "my ($x, $y);". -# - Supports types and attributes. -# -sub TryToGetVariable #(indexRef, lineNumberRef) - { - my ($self, $indexRef, $lineNumberRef) = @_; - my $tokens = $self->Tokens(); - - my $firstToken = lc( $tokens->[$$indexRef] ); - - if ($firstToken eq 'my' || $firstToken eq 'our' || $firstToken eq 'local') - { - my $prototypeStart = $$indexRef; - my $prototypeStartLine = $$lineNumberRef; - my $prototypeEnd = $$indexRef + 1; - my $prototypeEndLine = $$lineNumberRef; - - $self->TryToSkipWhitespace(\$prototypeEnd, \$prototypeEndLine); - - - # Get the type if present. - - my $type; - - if ($tokens->[$prototypeEnd] =~ /^[a-z\:]/i) - { - do - { - $type .= $tokens->[$prototypeEnd]; - $prototypeEnd++; - } - while ($tokens->[$prototypeEnd] =~ /^[a-z\:]/i); - - if (!$self->TryToSkipWhitespace(\$prototypeEnd, \$prototypeEndLine)) - { return undef; }; - }; - - - # Get the name, or possibly names. - - if ($tokens->[$prototypeEnd] eq '(') - { - # If there's multiple variables, we'll need to build a custom prototype for each one. $firstToken already has the - # declaring word. We're going to store each name in @names, and we're going to use $prototypeStart and - # $prototypeEnd to capture any properties appearing after the list. - - my $name; - my @names; - my $hasComma = 0; - - $prototypeStart = $prototypeEnd + 1; - $prototypeStartLine = $prototypeEndLine; - - for (;;) - { - $self->TryToSkipWhitespace(\$prototypeStart, \$prototypeStartLine); - - $name = $self->TryToGetVariableName(\$prototypeStart, \$prototypeStartLine); - - if (!defined $name) - { return undef; }; - - push @names, $name; - - $self->TryToSkipWhitespace(\$prototypeStart, \$prototypeStartLine); - - # We can have multiple commas in a row. We can also have trailing commas. However, the parenthesis must - # not start with a comma or be empty, hence this logic does not appear earlier. - while ($tokens->[$prototypeStart] eq ',') - { - $prototypeStart++; - $self->TryToSkipWhitespace(\$prototypeStart, \$prototypeStartLine); - - $hasComma = 1; - } - - if ($tokens->[$prototypeStart] eq ')') - { - $prototypeStart++; - last; - } - elsif (!$hasComma) - { return undef; }; - }; - - - # Now find the end of the prototype. - - $prototypeEnd = $prototypeStart; - $prototypeEndLine = $prototypeStartLine; - - while ($prototypeEnd < scalar @$tokens && - $tokens->[$prototypeEnd] !~ /^[\;\=]/) - { - $prototypeEnd++; - }; - - - my $prototypePrefix = $firstToken . ' '; - if (defined $type) - { $prototypePrefix .= $type . ' '; }; - - my $prototypeSuffix = ' ' . $self->CreateString($prototypeStart, $prototypeEnd); - - foreach $name (@names) - { - my $prototype = $self->NormalizePrototype( $prototypePrefix . $name . $prototypeSuffix ); - - $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New(::TOPIC_VARIABLE(), $name, - $self->CurrentScope(), undef, - $prototype, - undef, undef, $prototypeStartLine)); - }; - - $self->SkipRestOfStatement(\$prototypeEnd, \$prototypeEndLine); - - $$indexRef = $prototypeEnd; - $$lineNumberRef = $prototypeEndLine; - } - - else # no parenthesis - { - my $name = $self->TryToGetVariableName(\$prototypeEnd, \$prototypeEndLine); - - if (!defined $name) - { return undef; }; - - while ($prototypeEnd < scalar @$tokens && - $tokens->[$prototypeEnd] !~ /^[\;\=]/) - { - $prototypeEnd++; - }; - - my $prototype = $self->NormalizePrototype( $self->CreateString($prototypeStart, $prototypeEnd) ); - - $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New(::TOPIC_VARIABLE(), $name, - $self->CurrentScope(), undef, - $prototype, - undef, undef, $prototypeStartLine)); - - $self->SkipRestOfStatement(\$prototypeEnd, \$prototypeEndLine); - - $$indexRef = $prototypeEnd; - $$lineNumberRef = $prototypeEndLine; - }; - - return 1; - } - else - { return undef; }; - }; - - -# -# Function: TryToGetVariableName -# -# Determines if the position is at a variable name, and if so, skips it and returns the name. -# -sub TryToGetVariableName #(indexRef, lineNumberRef) - { - my ($self, $indexRef, $lineNumberRef) = @_; - my $tokens = $self->Tokens(); - - my $name; - - if ($tokens->[$$indexRef] =~ /^[\$\@\%\*]/) - { - $name .= $tokens->[$$indexRef]; - $$indexRef++; - - $self->TryToSkipWhitespace($indexRef, $lineNumberRef); - - if ($tokens->[$$indexRef] =~ /^[a-z_]/i) - { - $name .= $tokens->[$$indexRef]; - $$indexRef++; - } - else - { return undef; }; - }; - - return $name; - }; - - -# -# Function: TryToGetListOfStrings -# -# Attempts to retrieve a list of strings from the current position. Returns an arrayref of them if any are found, or undef if none. -# It stops the moment it reaches a non-string, so "string1, variable, string2" will only return string1. -# -# Supported Syntaxes: -# -# - Supports parenthesis. -# - Supports all string forms supported by <TryToSkipString()>. -# - Supports qw() string arrays. -# -sub TryToGetListOfStrings #(indexRef, lineNumberRef) - { - my ($self, $indexRef, $lineNumberRef) = @_; - my $tokens = $self->Tokens(); - - my $parenthesis = 0; - my $strings; - - while ($$indexRef < scalar @$tokens) - { - # We'll tolerate parenthesis. - if ($tokens->[$$indexRef] eq '(') - { - $$indexRef++; - $parenthesis++; - } - elsif ($tokens->[$$indexRef] eq ')') - { - if ($parenthesis == 0) - { last; }; - - $$indexRef++; - $parenthesis--; - } - elsif ($tokens->[$$indexRef] eq ',') - { - $$indexRef++; - } - else - { - my ($startContent, $endContent); - my $symbolIndex = $$indexRef; - - if ($self->TryToSkipString($indexRef, $lineNumberRef, \$startContent, \$endContent)) - { - my $content = $self->CreateString($startContent, $endContent); - - if (!defined $strings) - { $strings = [ ]; }; - - if (lc($tokens->[$symbolIndex]) eq 'qw') - { - $content =~ tr/ \t\n/ /s; - $content =~ s/^ //; - - my @qwStrings = split(/ /, $content); - - push @$strings, @qwStrings; - } - else - { - push @$strings, $content; - }; - } - else - { last; }; - }; - - $self->TryToSkipWhitespace($indexRef, $lineNumberRef); - }; - - return $strings; - }; - - -############################################################################### -# Group: Low Level Parsing Functions - - -# -# Function: GenericSkip -# -# Advances the position one place through general code. -# -# - If the position is on a comment or string, it will skip it completely. -# - If the position is on an opening symbol, it will skip until the past the closing symbol. -# - If the position is on a regexp or quote-like operator, it will skip it completely. -# - If the position is on a backslash, it will skip it and the following token. -# - If the position is on whitespace (including comments), it will skip it completely. -# - Otherwise it skips one token. -# -# Parameters: -# -# indexRef - A reference to the current index. -# lineNumberRef - A reference to the current line number. -# noRegExps - If set, does not test for regular expressions. -# allowStringedClosingParens - If set, allows $) to end a parenthesis set. -# -sub GenericSkip #(indexRef, lineNumberRef, noRegExps, allowStringedClosingParens) - { - my ($self, $indexRef, $lineNumberRef, $noRegExps, $allowStringedClosingParens) = @_; - my $tokens = $self->Tokens(); - - if ($tokens->[$$indexRef] eq "\\" && $$indexRef + 1 < scalar @$tokens && $tokens->[$$indexRef+1] ne "\n") - { $$indexRef += 2; } - - # Note that we don't want to count backslashed ()[]{} since they could be in regexps. Also, ()[] are valid variable names - # when preceded by a string. - - # We can ignore the scope stack because we're just skipping everything without parsing, and we need recursion anyway. - elsif ($tokens->[$$indexRef] eq '{' && !$self->IsBackslashed($$indexRef)) - { - $$indexRef++; - $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, '}', $noRegExps, $allowStringedClosingParens); - } - elsif ($tokens->[$$indexRef] eq '(' && !$self->IsBackslashed($$indexRef) && !$self->IsStringed($$indexRef)) - { - $$indexRef++; - - do - { $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, ')', $noRegExps, $allowStringedClosingParens); } - while ($$indexRef < scalar @$tokens && $self->IsStringed($$indexRef - 1) && !$allowStringedClosingParens); - } - elsif ($tokens->[$$indexRef] eq '[' && !$self->IsBackslashed($$indexRef) && !$self->IsStringed($$indexRef)) - { - $$indexRef++; - - do - { $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, ']', $noRegExps, $allowStringedClosingParens); } - while ($$indexRef < scalar @$tokens && $self->IsStringed($$indexRef - 1)); - } - - elsif ($self->TryToSkipWhitespace($indexRef, $lineNumberRef) || - $self->TryToSkipString($indexRef, $lineNumberRef) || - $self->TryToSkipHereDocDeclaration($indexRef, $lineNumberRef) || - (!$noRegExps && $self->TryToSkipRegexp($indexRef, $lineNumberRef) ) ) - { - } - - else - { $$indexRef++; }; - }; - - -# -# Function: GenericSkipUntilAfter -# -# Advances the position via <GenericSkip()> until a specific token is reached and passed. -# -sub GenericSkipUntilAfter #(indexRef, lineNumberRef, token, noRegExps, allowStringedClosingParens) - { - my ($self, $indexRef, $lineNumberRef, $token, $noRegExps, $allowStringedClosingParens) = @_; - my $tokens = $self->Tokens(); - - while ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] ne $token) - { $self->GenericSkip($indexRef, $lineNumberRef, $noRegExps, $allowStringedClosingParens); }; - - if ($tokens->[$$indexRef] eq "\n") - { $$lineNumberRef++; }; - $$indexRef++; - }; - - -# -# Function: GenericRegexpSkip -# -# Advances the position one place through regexp code. -# -# - If the position is on an opening symbol, it will skip until the past the closing symbol. -# - If the position is on a backslash, it will skip it and the following token. -# - If the position is on whitespace (not including comments), it will skip it completely. -# - Otherwise it skips one token. -# -# Also differs from <GenericSkip()> in that the parenthesis in $( and $) do count against the scope, where they wouldn't -# normally. -# -# Parameters: -# -# indexRef - A reference to the current index. -# lineNumberRef - A reference to the current line number. -# inBrackets - Whether we're in brackets or not. If true, we don't care about matching braces and parenthesis. -# -sub GenericRegexpSkip #(indexRef, lineNumberRef, inBrackets) - { - my ($self, $indexRef, $lineNumberRef, $inBrackets) = @_; - my $tokens = $self->Tokens(); - - if ($tokens->[$$indexRef] eq "\\" && $$indexRef + 1 < scalar @$tokens && $tokens->[$$indexRef+1] ne "\n") - { $$indexRef += 2; } - - # We can ignore the scope stack because we're just skipping everything without parsing, and we need recursion anyway. - elsif ($tokens->[$$indexRef] eq '{' && !$self->IsBackslashed($$indexRef) && !$inBrackets) - { - $$indexRef++; - $self->GenericRegexpSkipUntilAfter($indexRef, $lineNumberRef, '}'); - } - elsif ($tokens->[$$indexRef] eq '(' && !$self->IsBackslashed($$indexRef) && !$inBrackets) - { - $$indexRef++; - $self->GenericRegexpSkipUntilAfter($indexRef, $lineNumberRef, ')'); - } - elsif ($tokens->[$$indexRef] eq '[' && !$self->IsBackslashed($$indexRef) && !$self->IsStringed($$indexRef)) - { - $$indexRef++; - - do - { $self->GenericRegexpSkipUntilAfter($indexRef, $lineNumberRef, ']'); } - while ($$indexRef < scalar @$tokens && $self->IsStringed($$indexRef - 1)); - } - - elsif ($tokens->[$$indexRef] eq "\n") - { - $$lineNumberRef++; - $$indexRef++; - } - - else - { $$indexRef++; }; - }; - - -# -# Function: GenericRegexpSkipUntilAfter -# -# Advances the position via <GenericRegexpSkip()> until a specific token is reached and passed. -# -sub GenericRegexpSkipUntilAfter #(indexRef, lineNumberRef, token) - { - my ($self, $indexRef, $lineNumberRef, $token) = @_; - my $tokens = $self->Tokens(); - - my $inBrackets = ( $token eq ']' ); - - while ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] ne $token) - { $self->GenericRegexpSkip($indexRef, $lineNumberRef, $inBrackets); }; - - if ($tokens->[$$indexRef] eq "\n") - { $$lineNumberRef++; }; - $$indexRef++; - }; - - -# -# Function: SkipRestOfStatement -# -# Advances the position via <GenericSkip()> until after the end of the current statement, which is defined as a semicolon or -# a brace group. Of course, either of those appearing inside parenthesis, a nested brace group, etc. don't count. -# -sub SkipRestOfStatement #(indexRef, lineNumberRef) - { - my ($self, $indexRef, $lineNumberRef) = @_; - my $tokens = $self->Tokens(); - - while ($$indexRef < scalar @$tokens && - $tokens->[$$indexRef] ne ';' && - !($tokens->[$$indexRef] eq '{' && !$self->IsStringed($$indexRef)) ) - { - $self->GenericSkip($indexRef, $lineNumberRef); - }; - - if ($tokens->[$$indexRef] eq ';') - { $$indexRef++; } - elsif ($tokens->[$$indexRef] eq '{') - { $self->GenericSkip($indexRef, $lineNumberRef); }; - }; - - -# -# Function: TryToSkipWhitespace -# -# If the current position is on whitespace it skips them and returns true. If there are a number of these in a row, it skips them -# all. -# -# Supported Syntax: -# -# - Whitespace -# - Line break -# - All comment forms supported by <TryToSkipComment()> -# - Here Doc content -# -sub TryToSkipWhitespace #(indexRef, lineNumberRef) - { - my ($self, $indexRef, $lineNumberRef) = @_; - my $tokens = $self->Tokens(); - - my $result; - - while ($$indexRef < scalar @$tokens) - { - if ($self->TryToSkipHereDocContent($indexRef, $lineNumberRef) || - $self->TryToSkipComment($indexRef, $lineNumberRef)) - { - $result = 1; - } - elsif ($tokens->[$$indexRef] =~ /^[ \t]/) - { - $$indexRef++; - $result = 1; - } - elsif ($tokens->[$$indexRef] eq "\n") - { - $$indexRef++; - $$lineNumberRef++; - $result = 1; - } - else - { last; }; - }; - - return $result; - }; - - -# -# Function: TryToSkipComment -# If the current position is on a comment, skip past it and return true. -# -sub TryToSkipComment #(indexRef, lineNumberRef) - { - my ($self, $indexRef, $lineNumberRef) = @_; - - return ( $self->TryToSkipLineComment($indexRef, $lineNumberRef) || - $self->TryToSkipPODComment($indexRef, $lineNumberRef) ); - }; - - -# -# Function: TryToSkipLineComment -# If the current position is on a line comment symbol, skip past it and return true. -# -sub TryToSkipLineComment #(indexRef, lineNumberRef) - { - my ($self, $indexRef, $lineNumberRef) = @_; - my $tokens = $self->Tokens(); - - # Note that $#var is not a comment. - if ($tokens->[$$indexRef] eq '#' && !$self->IsStringed($$indexRef)) - { - $self->SkipRestOfLine($indexRef, $lineNumberRef); - return 1; - } - else - { return undef; }; - }; - - -# -# Function: TryToSkipPODComment -# If the current position is on a POD comment symbol, skip past it and return true. -# -sub TryToSkipPODComment #(indexRef, lineNumberRef) - { - my ($self, $indexRef, $lineNumberRef) = @_; - my $tokens = $self->Tokens(); - - # Note that whitespace is not allowed before the equals sign. It must directly start a line. - if ($tokens->[$$indexRef] eq '=' && - ( $$indexRef == 0 || $tokens->[$$indexRef - 1] eq "\n" ) && - $tokens->[$$indexRef + 1] =~ /^[a-z]/i ) - { - # Skip until =cut. Note that it's theoretically possible for =cut to appear without a prior POD directive. - - do - { - if ($tokens->[$$indexRef] eq '=' && lc( $tokens->[$$indexRef + 1] ) eq 'cut') - { - $self->SkipRestOfLine($indexRef, $lineNumberRef); - last; - } - elsif ($tokens->[$$indexRef] eq '(' && $$indexRef + 2 < scalar @$tokens && - $tokens->[$$indexRef+1] eq 'NDPODBREAK' && $tokens->[$$indexRef+2] eq ')') - { - $$indexRef += 3; - last; - } - else - { - $self->SkipRestOfLine($indexRef, $lineNumberRef); - }; - } - while ($$indexRef < scalar @$tokens); - - return 1; - } - else - { return undef; }; - }; - - -# -# Function: TryToSkipString -# If the current position is on a string delimiter, skip past the string and return true. -# -# Parameters: -# -# indexRef - A reference to the index of the position to start at. -# lineNumberRef - A reference to the line number of the position. -# startContentIndexRef - A reference to the variable in which to store the index of the first content token. May be undef. -# endContentIndexRef - A reference to the variable in which to store the index of the end of the content, which is one past -# the last content token. may be undef. -# -# Returns: -# -# Whether the position was at a string. The index, line number, and content index variabls will only be changed if true. -# -# Syntax Support: -# -# - Supports quotes, apostrophes, backticks, q(), qq(), qx(), and qw(). -# - All symbols are supported for the letter forms. -# -sub TryToSkipString #(indexRef, lineNumberRef, startContentIndexRef, endContentIndexRef) - { - my ($self, $indexRef, $lineNumberRef, $startContentIndexRef, $endContentIndexRef) = @_; - my $tokens = $self->Tokens(); - - # The three string delimiters. All three are Perl variables when preceded by a dollar sign. - if (!$self->IsStringed($$indexRef) && - ( $self->SUPER::TryToSkipString($indexRef, $lineNumberRef, '\'', '\'', $startContentIndexRef, $endContentIndexRef) || - $self->SUPER::TryToSkipString($indexRef, $lineNumberRef, '"', '"', $startContentIndexRef, $endContentIndexRef) || - $self->SUPER::TryToSkipString($indexRef, $lineNumberRef, '`', '`', $startContentIndexRef, $endContentIndexRef) ) ) - { - return 1; - } - elsif ($tokens->[$$indexRef] =~ /^(?:q|qq|qx|qw)$/i && - ($$indexRef == 0 || $tokens->[$$indexRef - 1] !~ /^[\$\%\@\*]$/)) - { - $$indexRef++; - - $self->TryToSkipWhitespace($indexRef, $lineNumberRef); - - my $openingSymbol = $tokens->[$$indexRef]; - my $closingSymbol; - - if ($openingSymbol eq '{') - { $closingSymbol = '}'; } - elsif ($openingSymbol eq '(') - { $closingSymbol = ')'; } - elsif ($openingSymbol eq '[') - { $closingSymbol = ']'; } - elsif ($openingSymbol eq '<') - { $closingSymbol = '>'; } - else - { $closingSymbol = $openingSymbol; }; - - $self->SUPER::TryToSkipString($indexRef, $lineNumberRef, $openingSymbol, $closingSymbol, - $startContentIndexRef, $endContentIndexRef); - - return 1; - } - else - { return undef; }; - }; - - -# -# Function: TryToSkipHereDocDeclaration -# -# If the current position is on a Here Doc declaration, add its terminators to <hereDocTerminators> and skip it. -# -# Syntax Support: -# -# - Supports <<EOF -# - Supports << "String" with all string forms supported by <TryToSkipString()>. -# -sub TryToSkipHereDocDeclaration #(indexRef, lineNumberRef) - { - my ($self, $indexRef, $lineNumberRef) = @_; - my $tokens = $self->Tokens(); - - my $index = $$indexRef; - my $lineNumber = $$lineNumberRef; - - if ($tokens->[$index] eq '<' && $tokens->[$index + 1] eq '<') - { - $index += 2; - my $success; - - # No whitespace allowed with the bare word. - if ($tokens->[$index] eq 'EOF') - { - push @hereDocTerminators, [ 'EOF' ]; - $index++; - $success = 1; - } - else - { - $self->TryToSkipWhitespace(\$index, \$lineNumber); - - my ($contentStart, $contentEnd); - if ($self->TryToSkipString(\$index, \$lineNumber, \$contentStart, \$contentEnd)) - { - push @hereDocTerminators, [ @{$tokens}[$contentStart..$contentEnd - 1] ]; - $success = 1; - }; - }; - - if ($success) - { - $$indexRef = $index; - $$lineNumberRef = $lineNumber; - - return 1; - }; - }; - - return 0; - }; - - -# -# Function: TryToSkipHereDocContent -# -# If the current position is at the beginning of a line and there are entries in <hereDocTerminators>, skips lines until all the -# terminators are exhausted or we reach the end of the file. -# -# Returns: -# -# Whether the position was on Here Doc content. -# -sub TryToSkipHereDocContent #(indexRef, lineNumberRef) - { - my ($self, $indexRef, $lineNumberRef) = @_; - my $tokens = $self->Tokens(); - - # We don't use IsFirstLineToken() because it really needs to be the first line token. Whitespace is not allowed. - if ($$indexRef > 0 && $tokens->[$$indexRef - 1] eq "\n") - { - my $success = (scalar @hereDocTerminators > 0); - - while (scalar @hereDocTerminators && $$indexRef < scalar @$tokens) - { - my $terminatorIndex = 0; - - while ($hereDocTerminators[0]->[$terminatorIndex] eq $tokens->[$$indexRef]) - { - $terminatorIndex++; - $$indexRef++; - }; - - if ($terminatorIndex == scalar @{$hereDocTerminators[0]} && - ($tokens->[$$indexRef] eq "\n" || ($tokens->[$$indexRef] =~ /^[ \t]/ && $tokens->[$$indexRef + 1] eq "\n")) ) - { - shift @hereDocTerminators; - $$indexRef++; - $$lineNumberRef++; - } - else - { $self->SkipRestOfLine($indexRef, $lineNumberRef); }; - }; - - return $success; - } - - else - { return 0; }; - }; - - -# -# Function: TryToSkipRegexp -# If the current position is on a regular expression or a quote-like operator, skip past it and return true. -# -# Syntax Support: -# -# - Supports //, ??, m//, qr//, s///, tr///, and y///. -# - All symbols are supported for the letter forms. -# -sub TryToSkipRegexp #(indexRef, lineNumberRef) - { - my ($self, $indexRef, $lineNumberRef) = @_; - my $tokens = $self->Tokens(); - - my $isRegexp; - - if ($tokens->[$$indexRef] =~ /^(?:m|qr|s|tr|y|)$/i && - ($$indexRef == 0 || $tokens->[$$indexRef - 1] !~ /^[\$\%\@\*\-]$/) ) - { $isRegexp = 1; } - elsif ( ($tokens->[$$indexRef] eq '/' || $tokens->[$$indexRef] eq '?') && !$self->IsStringed($$indexRef) ) - { - my $index = $$indexRef - 1; - - while ($index >= 0 && $tokens->[$index] =~ /^(?: |\t|\n)/) - { $index--; }; - - if ($index < 0 || $tokens->[$index] !~ /^[a-zA-Z0-9_\)\]\}\'\"\`]/) - { $isRegexp = 1; }; - }; - - if ($isRegexp) - { - my $operator = lc($tokens->[$$indexRef]); - my $index = $$indexRef; - my $lineNumber = $$lineNumberRef; - - if ($operator =~ /^[\?\/]/) - { $operator = 'm'; } - else - { - $index++; - - # Believe it or not, s#...# is allowed. We can't pass over number signs here. - if ($tokens->[$index] ne '#') - { $self->TryToSkipWhitespace(\$index, \$lineNumber); }; - }; - - if ($tokens->[$index] =~ /^\w/) - { return undef; }; - - my $openingSymbol = $tokens->[$index]; - my $closingSymbol; - - if ($openingSymbol eq '{') - { $closingSymbol = '}'; } - elsif ($openingSymbol eq '(') - { $closingSymbol = ')'; } - elsif ($openingSymbol eq '[') - { $closingSymbol = ']'; } - elsif ($openingSymbol eq '<') - { $closingSymbol = '>'; } - else - { $closingSymbol = $openingSymbol; }; - - $index++; - - $self->GenericRegexpSkipUntilAfter(\$index, \$lineNumber, $closingSymbol); - - $$indexRef = $index; - $$lineNumberRef = $lineNumber; - - if ($operator =~ /^(?:s|tr|y)$/) - { - if ($openingSymbol ne $closingSymbol) - { - $self->TryToSkipWhitespace($indexRef, $lineNumberRef); - - $openingSymbol = $tokens->[$index]; - - if ($openingSymbol eq '{') - { $closingSymbol = '}'; } - elsif ($openingSymbol eq '(') - { $closingSymbol = ')'; } - elsif ($openingSymbol eq '[') - { $closingSymbol = ']'; } - elsif ($openingSymbol eq '<') - { $closingSymbol = '>'; } - else - { $closingSymbol = $openingSymbol; }; - - $$indexRef++; - }; - - if ($operator eq 's') - { - $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, $closingSymbol, 1); - } - else # ($operator eq 'tr' || $operator eq 'y') - { - while ($$indexRef < scalar @$tokens && - ($tokens->[$$indexRef] ne $closingSymbol || $self->IsBackslashed($$indexRef)) ) - { - if ($tokens->[$$indexRef] eq "\n") - { $$lineNumberRef++; }; - $$indexRef++; - }; - - $$indexRef++; - }; - }; - - # We want to skip any letters after the regexp. Otherwise something like tr/a/b/s; could have the trailing s; interpreted - # as another regexp. Whitespace is not allowed between the closing symbol and the letters. - - if ($tokens->[$$indexRef] =~ /^[a-z]/i) - { $$indexRef++; }; - - return 1; - }; - - return undef; - }; - - - -############################################################################### -# Group: Support Functions - - -# -# Function: IsStringed -# -# Returns whether the position is after a string (dollar sign) character. -# -# Parameters: -# -# index - The index of the postition. -# -sub IsStringed #(index) - { - my ($self, $index) = @_; - my $tokens = $self->Tokens(); - - if ($index > 0 && $tokens->[$index - 1] eq '$') - { return 1; } - else - { return undef; }; - }; - - -1; |