added doc tool

author: Magnus Auvinen <magnus.auvinen@gmail.com> 2008-08-02 08:21:29 +0000
committer: Magnus Auvinen <magnus.auvinen@gmail.com> 2008-08-02 08:21:29 +0000
commit: 61bfe2d70cae6be8c4086a210a5451135ccca9ea (patch)
tree: 62bf7808b1b2bfe5f56fe1e329871fb0991d0687 /docs/tool/Modules/NaturalDocs/Languages/Perl.pm
parent: a13b94f9e0bca8ea892311d9d9e0c0bc48616ea7 (diff)
download: zcatch-61bfe2d70cae6be8c4086a210a5451135ccca9ea.tar.gz
zcatch-61bfe2d70cae6be8c4086a210a5451135ccca9ea.zip
1 files changed, 1370 insertions, 0 deletions
diff --git a/docs/tool/Modules/NaturalDocs/Languages/Perl.pm b/docs/tool/Modules/NaturalDocs/Languages/Perl.pm
new file mode 100644
index 00000000..8817aadc
--- /dev/null
+++ b/docs/tool/Modules/NaturalDocs/Languages/Perl.pm
@@ -0,0 +1,1370 @@
+###############################################################################
+#
+#   Class: NaturalDocs::Languages::Perl
+#
+###############################################################################
+#
+#   A subclass to handle the language variations of Perl.
+#
+#
+#   Topic: Language Support
+#
+#       Supported:
+#
+#       - Packages
+#       - Inheritance via "use base" and "@ISA =".
+#       - Functions
+#       - Variables
+#
+#       Not supported yet:
+#
+#       - Constants
+#
+###############################################################################
+
+# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure
+# Natural Docs is licensed under the GPL
+
+use strict;
+use integer;
+
+package NaturalDocs::Languages::Perl;
+
+use base 'NaturalDocs::Languages::Advanced';
+
+
+#
+#   array: hereDocTerminators
+#   An array of active Here Doc terminators, or an empty array if not active.  Each entry is an arrayref of tokens.  The entries
+#   must appear in the order they must appear in the source.
+#
+my @hereDocTerminators;
+
+
+
+###############################################################################
+# Group: Interface Functions
+
+
+#
+#   Function: PackageSeparator
+#   Returns the package separator symbol.
+#
+sub PackageSeparator
+    {  return '::';  };
+
+#
+#   Function: EnumValues
+#   Returns the <EnumValuesType> that describes how the language handles enums.
+#
+sub EnumValues
+    {  return ::ENUM_GLOBAL();  };
+
+
+#
+#   Function: ParseFile
+#
+#   Parses the passed source file, sending comments acceptable for documentation to <NaturalDocs::Parser->OnComment()>.
+#
+#   Parameters:
+#
+#       sourceFile - The name of the source file to parse.
+#       topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file.
+#
+#   Returns:
+#
+#       The array ( autoTopics, scopeRecord ).
+#
+#       autoTopics - An arrayref of automatically generated topics from the file, or undef if none.
+#       scopeRecord - An arrayref of <NaturalDocs::Languages::Advanced::ScopeChanges>, or undef if none.
+#
+sub ParseFile #(sourceFile, topicsList)
+    {
+    my ($self, $sourceFile, $topicsList) = @_;
+
+    @hereDocTerminators = ( );
+
+    # The regular block comment symbols are undef because they're all potentially JavaDoc comments.  PreprocessFile() will
+    # handle translating things like =begin naturaldocs and =begin javadoc to =begin nd.
+    $self->ParseForCommentsAndTokens($sourceFile, [ '#' ], undef, [ '##' ], [ '=begin nd', '=end nd' ]);
+
+    my $tokens = $self->Tokens();
+    my $index = 0;
+    my $lineNumber = 1;
+
+    while ($index < scalar @$tokens)
+        {
+        if ($self->TryToSkipWhitespace(\$index, \$lineNumber) ||
+            $self->TryToGetPackage(\$index, \$lineNumber) ||
+            $self->TryToGetBase(\$index, \$lineNumber) ||
+            $self->TryToGetFunction(\$index, \$lineNumber) ||
+            $self->TryToGetVariable(\$index, \$lineNumber) )
+            {
+            # The functions above will handle everything.
+            }
+
+        elsif ($tokens->[$index] eq '{')
+            {
+            $self->StartScope('}', $lineNumber, undef);
+            $index++;
+            }
+
+        elsif ($tokens->[$index] eq '}')
+            {
+            if ($self->ClosingScopeSymbol() eq '}')
+                {  $self->EndScope($lineNumber);  };
+
+            $index++;
+            }
+
+        elsif (lc($tokens->[$index]) eq 'eval')
+            {
+            # We want to skip the token in this case instead of letting it fall to SkipRestOfStatement.  This allows evals with braces
+            # to be treated like normal floating braces.
+            $index++;
+            }
+
+        else
+            {
+            $self->SkipRestOfStatement(\$index, \$lineNumber);
+            };
+        };
+
+
+    # Don't need to keep these around.
+    $self->ClearTokens();
+
+    return ( $self->AutoTopics(), $self->ScopeRecord() );
+    };
+
+
+#
+#   Function: PreprocessFile
+#
+#   Overridden to support "=begin nd" and similar.
+#
+#   - "=begin [nd|naturaldocs|natural docs|jd|javadoc|java doc]" all translate to "=begin nd".
+#   - "=[nd|naturaldocs|natural docs]" also translate to "=begin nd".
+#   - "=end [nd|naturaldocs|natural docs|jd|javadoc]" all translate to "=end nd".
+#   - "=cut" from a ND block translates into "=end nd", but the next line will be altered to begin with "(NDPODBREAK)".  This is
+#     so if there is POD leading into ND which ends with a cut, the parser can still end the original POD because the end ND line
+#     would have been removed.  Remember, <NaturalDocs::Languages::Advanced->ParseForCommentsAndTokens()> removes
+#     Natural Docs-worthy comments to save parsing time.
+#   - "=pod begin nd" and "=pod end nd" are supported for compatibility with ND 1.32 and earlier, even though the syntax is a
+#     mistake.
+#   - It also supports the wrong plural forms, so naturaldoc/natural doc/javadocs/java docs will work.
+#
+sub PreprocessFile #(lines)
+    {
+    my ($self, $lines) = @_;
+
+    my $inNDPOD = 0;
+    my $mustBreakPOD = 0;
+
+    for (my $i = 0; $i < scalar @$lines; $i++)
+        {
+        if ($lines->[$i] =~ /^\=(?:(?:pod[ \t]+)?begin[ \t]+)?(?:nd|natural[ \t]*docs?|jd|java[ \t]*docs?)[ \t]*$/i)
+            {
+            $lines->[$i] = '=begin nd';
+            $inNDPOD = 1;
+            $mustBreakPOD = 0;
+            }
+        elsif ($lines->[$i] =~ /^\=(?:pod[ \t]+)end[ \t]+(?:nd|natural[ \t]*docs?|jd|javadocs?)[ \t]*$/i)
+            {
+            $lines->[$i] = '=end nd';
+            $inNDPOD = 0;
+            $mustBreakPOD = 0;
+            }
+        elsif ($lines->[$i] =~ /^\=cut[ \t]*$/i)
+            {
+            if ($inNDPOD)
+                {
+                $lines->[$i] = '=end nd';
+                $inNDPOD = 0;
+                $mustBreakPOD = 1;
+                };
+            }
+        elsif ($mustBreakPOD)
+            {
+            $lines->[$i] = '(NDPODBREAK)' . $lines->[$i];
+            $mustBreakPOD = 0;
+            };
+        };
+    };
+
+
+
+###############################################################################
+# Group: Statement Parsing Functions
+# All functions here assume that the current position is at the beginning of a statement.
+#
+# Note for developers: I am well aware that the code in these functions do not check if we're past the end of the tokens as
+# often as it should.  We're making use of the fact that Perl will always return undef in these cases to keep the code simpler.
+
+
+#
+#   Function: TryToGetPackage
+#
+#   Determines whether the position is at a package declaration statement, and if so, generates a topic for it, skips it, and
+#   returns true.
+#
+sub TryToGetPackage #(indexRef, lineNumberRef)
+    {
+    my ($self, $indexRef, $lineNumberRef) = @_;
+    my $tokens = $self->Tokens();
+
+    if (lc($tokens->[$$indexRef]) eq 'package')
+        {
+        my $index = $$indexRef + 1;
+        my $lineNumber = $$lineNumberRef;
+
+        if (!$self->TryToSkipWhitespace(\$index, \$lineNumber))
+            {  return undef;  };
+
+        my $name;
+
+        while ($tokens->[$index] =~ /^[a-z_\:]/i)
+            {
+            $name .= $tokens->[$index];
+            $index++;
+            };
+
+        if (!defined $name)
+            {  return undef;  };
+
+        my $autoTopic = NaturalDocs::Parser::ParsedTopic->New(::TOPIC_CLASS(), $name,
+                                                                                             undef, undef,
+                                                                                             undef,
+                                                                                             undef, undef, $$lineNumberRef);
+        $self->AddAutoTopic($autoTopic);
+
+        NaturalDocs::Parser->OnClass($autoTopic->Symbol());
+
+        $self->SetPackage($autoTopic->Symbol(), $$lineNumberRef);
+
+        $$indexRef = $index;
+        $$lineNumberRef = $lineNumber;
+        $self->SkipRestOfStatement($indexRef, $lineNumberRef);
+
+        return 1;
+        };
+
+    return undef;
+    };
+
+
+#
+#   Function: TryToGetBase
+#
+#   Determines whether the position is at a package base declaration statement, and if so, calls
+#   <NaturalDocs::Parser->OnClassParent()>.
+#
+#   Supported Syntaxes:
+#
+#   > use base [list of strings]
+#   > @ISA = [list of strings]
+#   > @[package]::ISA = [list of strings]
+#   > our @ISA = [list of strings]
+#
+sub TryToGetBase #(indexRef, lineNumberRef)
+    {
+    my ($self, $indexRef, $lineNumberRef) = @_;
+    my $tokens = $self->Tokens();
+
+    my ($index, $lineNumber, $class, $parents);
+
+    if (lc($tokens->[$$indexRef]) eq 'use')
+        {
+        $index = $$indexRef + 1;
+        $lineNumber = $$lineNumberRef;
+
+        if (!$self->TryToSkipWhitespace(\$index, \$lineNumber) ||
+           lc($tokens->[$index]) ne 'base')
+            {  return undef;  }
+
+        $index++;
+        $self->TryToSkipWhitespace(\$index, \$lineNumber);
+
+        $parents = $self->TryToGetListOfStrings(\$index, \$lineNumber);
+        }
+
+    else
+        {
+        $index = $$indexRef;
+        $lineNumber = $$lineNumberRef;
+
+        if (lc($tokens->[$index]) eq 'our')
+            {
+            $index++;
+            $self->TryToSkipWhitespace(\$index, \$lineNumber);
+            };
+
+        if ($tokens->[$index] eq '@')
+            {
+            $index++;
+
+            while ($index < scalar @$tokens)
+                {
+                if ($tokens->[$index] eq 'ISA')
+                    {
+                    $index++;
+                    $self->TryToSkipWhitespace(\$index, \$lineNumber);
+
+                    if ($tokens->[$index] eq '=')
+                        {
+                        $index++;
+                        $self->TryToSkipWhitespace(\$index, \$lineNumber);
+
+                        $parents = $self->TryToGetListOfStrings(\$index, \$lineNumber);
+                        }
+                    else
+                        {  last;  };
+                    }
+
+                # If token isn't ISA...
+                elsif ($tokens->[$index] =~ /^[a-z0-9_:]/i)
+                    {
+                    $class .= $tokens->[$index];
+                    $index++;
+                    }
+                else
+                    {  last;  };
+                };
+            };
+        };
+
+    if (defined $parents)
+        {
+        if (defined $class)
+            {
+            $class =~ s/::$//;
+            my @classIdentifiers = split(/::/, $class);
+            $class = NaturalDocs::SymbolString->Join(@classIdentifiers);
+            }
+        else
+            {  $class = $self->CurrentScope();  };
+
+        foreach my $parent (@$parents)
+            {
+            my @parentIdentifiers = split(/::/, $parent);
+            my $parentSymbol = NaturalDocs::SymbolString->Join(@parentIdentifiers);
+
+            NaturalDocs::Parser->OnClassParent($class, $parentSymbol, undef, undef, ::RESOLVE_ABSOLUTE());
+            };
+
+        $$indexRef = $index;
+        $$lineNumberRef = $lineNumber;
+        $self->SkipRestOfStatement($indexRef, $lineNumberRef);
+
+        return 1;
+        }
+    else
+        {  return undef;  };
+    };
+
+
+#
+#   Function: TryToGetFunction
+#
+#   Determines whether the position is at a function declaration statement, and if so, generates a topic for it, skips it, and
+#   returns true.
+#
+sub TryToGetFunction #(indexRef, lineNumberRef)
+    {
+    my ($self, $indexRef, $lineNumberRef) = @_;
+    my $tokens = $self->Tokens();
+
+    if ( lc($tokens->[$$indexRef]) eq 'sub')
+        {
+        my $prototypeStart = $$indexRef;
+        my $prototypeStartLine = $$lineNumberRef;
+        my $prototypeEnd = $$indexRef + 1;
+        my $prototypeEndLine = $$lineNumberRef;
+
+        if ( !$self->TryToSkipWhitespace(\$prototypeEnd, \$prototypeEndLine) ||
+             $tokens->[$prototypeEnd] !~ /^[a-z_]/i )
+            {  return undef;  };
+
+        my $name = $tokens->[$prototypeEnd];
+        $prototypeEnd++;
+
+        # We parsed 'sub [name]'.  Now keep going until we find a semicolon or a brace.
+
+        for (;;)
+            {
+            if ($prototypeEnd >= scalar @$tokens)
+                {  return undef;  }
+
+            # End if we find a semicolon, since it means we found a predeclaration rather than an actual function.
+            elsif ($tokens->[$prototypeEnd] eq ';')
+                {  return undef;  }
+
+            elsif ($tokens->[$prototypeEnd] eq '{')
+                {
+                # Found it!
+
+                my $prototype = $self->NormalizePrototype( $self->CreateString($prototypeStart, $prototypeEnd) );
+
+                $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New(::TOPIC_FUNCTION(), $name,
+                                                                                                          $self->CurrentScope(), undef,
+                                                                                                          $prototype,
+                                                                                                          undef, undef, $prototypeStartLine));
+
+                $$indexRef = $prototypeEnd;
+                $$lineNumberRef = $prototypeEndLine;
+
+                $self->SkipRestOfStatement($indexRef, $lineNumberRef);
+
+                return 1;
+                }
+
+            else
+                {  $self->GenericSkip(\$prototypeEnd, \$prototypeEndLine, 0, 1);  };
+            };
+        }
+    else
+        {  return undef;  };
+    };
+
+
+#
+#   Function: TryToGetVariable
+#
+#   Determines if the position is at a variable declaration statement, and if so, generates a topic for it, skips it, and returns
+#   true.
+#
+#   Supported Syntaxes:
+#
+#   - Supports variables declared with "my", "our", and "local".
+#   - Supports multiple declarations in one statement, such as "my ($x, $y);".
+#   - Supports types and attributes.
+#
+sub TryToGetVariable #(indexRef, lineNumberRef)
+    {
+    my ($self, $indexRef, $lineNumberRef) = @_;
+    my $tokens = $self->Tokens();
+
+    my $firstToken = lc( $tokens->[$$indexRef] );
+
+    if ($firstToken eq 'my' || $firstToken eq 'our' || $firstToken eq 'local')
+        {
+        my $prototypeStart = $$indexRef;
+        my $prototypeStartLine = $$lineNumberRef;
+        my $prototypeEnd = $$indexRef + 1;
+        my $prototypeEndLine = $$lineNumberRef;
+
+        $self->TryToSkipWhitespace(\$prototypeEnd, \$prototypeEndLine);
+
+
+        # Get the type if present.
+
+        my $type;
+
+        if ($tokens->[$prototypeEnd] =~ /^[a-z\:]/i)
+            {
+            do
+                {
+                $type .= $tokens->[$prototypeEnd];
+                $prototypeEnd++;
+                }
+            while ($tokens->[$prototypeEnd] =~ /^[a-z\:]/i);
+
+            if (!$self->TryToSkipWhitespace(\$prototypeEnd, \$prototypeEndLine))
+                {  return undef;  };
+            };
+
+
+        # Get the name, or possibly names.
+
+        if ($tokens->[$prototypeEnd] eq '(')
+            {
+            # If there's multiple variables, we'll need to build a custom prototype for each one.  $firstToken already has the
+            # declaring word.  We're going to store each name in @names, and we're going to use $prototypeStart and
+            # $prototypeEnd to capture any properties appearing after the list.
+
+            my $name;
+            my @names;
+            my $hasComma = 0;
+
+            $prototypeStart = $prototypeEnd + 1;
+            $prototypeStartLine = $prototypeEndLine;
+
+            for (;;)
+                {
+                $self->TryToSkipWhitespace(\$prototypeStart, \$prototypeStartLine);
+
+                $name = $self->TryToGetVariableName(\$prototypeStart, \$prototypeStartLine);
+
+                if (!defined $name)
+                    {  return undef;  };
+
+                push @names, $name;
+
+                $self->TryToSkipWhitespace(\$prototypeStart, \$prototypeStartLine);
+
+                # We can have multiple commas in a row.  We can also have trailing commas.  However, the parenthesis must
+                # not start with a comma or be empty, hence this logic does not appear earlier.
+                while ($tokens->[$prototypeStart] eq ',')
+                    {
+                    $prototypeStart++;
+                    $self->TryToSkipWhitespace(\$prototypeStart, \$prototypeStartLine);
+
+                    $hasComma = 1;
+                    }
+
+                if ($tokens->[$prototypeStart] eq ')')
+                    {
+                    $prototypeStart++;
+                    last;
+                    }
+                elsif (!$hasComma)
+                    {  return undef;  };
+                };
+
+
+            # Now find the end of the prototype.
+
+            $prototypeEnd = $prototypeStart;
+            $prototypeEndLine = $prototypeStartLine;
+
+            while ($prototypeEnd < scalar @$tokens &&
+                     $tokens->[$prototypeEnd] !~ /^[\;\=]/)
+                {
+                $prototypeEnd++;
+                };
+
+
+            my $prototypePrefix = $firstToken . ' ';
+            if (defined $type)
+                {  $prototypePrefix .= $type . ' ';  };
+
+            my $prototypeSuffix = ' ' . $self->CreateString($prototypeStart, $prototypeEnd);
+
+            foreach $name (@names)
+                {
+                my $prototype = $self->NormalizePrototype( $prototypePrefix . $name . $prototypeSuffix );
+
+                $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New(::TOPIC_VARIABLE(), $name,
+                                                                                                           $self->CurrentScope(), undef,
+                                                                                                           $prototype,
+                                                                                                           undef, undef, $prototypeStartLine));
+                };
+
+            $self->SkipRestOfStatement(\$prototypeEnd, \$prototypeEndLine);
+
+            $$indexRef = $prototypeEnd;
+            $$lineNumberRef = $prototypeEndLine;
+            }
+
+        else # no parenthesis
+            {
+            my $name = $self->TryToGetVariableName(\$prototypeEnd, \$prototypeEndLine);
+
+            if (!defined $name)
+                {  return undef;  };
+
+            while ($prototypeEnd < scalar @$tokens &&
+                     $tokens->[$prototypeEnd] !~ /^[\;\=]/)
+                {
+                $prototypeEnd++;
+                };
+
+            my $prototype = $self->NormalizePrototype( $self->CreateString($prototypeStart, $prototypeEnd) );
+
+            $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New(::TOPIC_VARIABLE(), $name,
+                                                                                                       $self->CurrentScope(), undef,
+                                                                                                       $prototype,
+                                                                                                       undef, undef, $prototypeStartLine));
+
+            $self->SkipRestOfStatement(\$prototypeEnd, \$prototypeEndLine);
+
+            $$indexRef = $prototypeEnd;
+            $$lineNumberRef = $prototypeEndLine;
+            };
+
+        return 1;
+        }
+    else
+        {  return undef;  };
+    };
+
+
+#
+#   Function: TryToGetVariableName
+#
+#   Determines if the position is at a variable name, and if so, skips it and returns the name.
+#
+sub TryToGetVariableName #(indexRef, lineNumberRef)
+    {
+    my ($self, $indexRef, $lineNumberRef) = @_;
+    my $tokens = $self->Tokens();
+
+    my $name;
+
+    if ($tokens->[$$indexRef] =~ /^[\$\@\%\*]/)
+        {
+        $name .= $tokens->[$$indexRef];
+        $$indexRef++;
+
+        $self->TryToSkipWhitespace($indexRef, $lineNumberRef);
+
+        if ($tokens->[$$indexRef] =~ /^[a-z_]/i)
+            {
+            $name .= $tokens->[$$indexRef];
+            $$indexRef++;
+            }
+        else
+            {  return undef;  };
+        };
+
+    return $name;
+    };
+
+
+#
+#   Function: TryToGetListOfStrings
+#
+#   Attempts to retrieve a list of strings from the current position.  Returns an arrayref of them if any are found, or undef if none.
+#   It stops the moment it reaches a non-string, so "string1, variable, string2" will only return string1.
+#
+#   Supported Syntaxes:
+#
+#   - Supports parenthesis.
+#   - Supports all string forms supported by <TryToSkipString()>.
+#   - Supports qw() string arrays.
+#
+sub TryToGetListOfStrings #(indexRef, lineNumberRef)
+    {
+    my ($self, $indexRef, $lineNumberRef) = @_;
+    my $tokens = $self->Tokens();
+
+    my $parenthesis = 0;
+    my $strings;
+
+    while ($$indexRef < scalar @$tokens)
+        {
+        # We'll tolerate parenthesis.
+        if ($tokens->[$$indexRef] eq '(')
+            {
+            $$indexRef++;
+            $parenthesis++;
+            }
+        elsif ($tokens->[$$indexRef] eq ')')
+            {
+            if ($parenthesis == 0)
+                {  last;  };
+
+            $$indexRef++;
+            $parenthesis--;
+            }
+        elsif ($tokens->[$$indexRef] eq ',')
+            {
+            $$indexRef++;
+            }
+        else
+            {
+            my ($startContent, $endContent);
+            my $symbolIndex = $$indexRef;
+
+            if ($self->TryToSkipString($indexRef, $lineNumberRef, \$startContent, \$endContent))
+                {
+                my $content = $self->CreateString($startContent, $endContent);
+
+                if (!defined $strings)
+                    {  $strings = [ ];  };
+
+                if (lc($tokens->[$symbolIndex]) eq 'qw')
+                    {
+                    $content =~ tr/ \t\n/ /s;
+                    $content =~ s/^ //;
+
+                    my @qwStrings = split(/ /, $content);
+
+                    push @$strings, @qwStrings;
+                    }
+                else
+                    {
+                    push @$strings, $content;
+                    };
+                }
+            else
+                {  last;  };
+            };
+
+        $self->TryToSkipWhitespace($indexRef, $lineNumberRef);
+        };
+
+    return $strings;
+    };
+
+
+###############################################################################
+# Group: Low Level Parsing Functions
+
+
+#
+#   Function: GenericSkip
+#
+#   Advances the position one place through general code.
+#
+#   - If the position is on a comment or string, it will skip it completely.
+#   - If the position is on an opening symbol, it will skip until the past the closing symbol.
+#   - If the position is on a regexp or quote-like operator, it will skip it completely.
+#   - If the position is on a backslash, it will skip it and the following token.
+#   - If the position is on whitespace (including comments), it will skip it completely.
+#   - Otherwise it skips one token.
+#
+#   Parameters:
+#
+#       indexRef - A reference to the current index.
+#       lineNumberRef - A reference to the current line number.
+#       noRegExps - If set, does not test for regular expressions.
+#
+sub GenericSkip #(indexRef, lineNumberRef, noRegExps)
+    {
+    my ($self, $indexRef, $lineNumberRef, $noRegExps, $allowStringedClosingParens) = @_;
+    my $tokens = $self->Tokens();
+
+    if ($tokens->[$$indexRef] eq "\\" && $$indexRef + 1 < scalar @$tokens && $tokens->[$$indexRef+1] ne "\n")
+        {  $$indexRef += 2;  }
+
+    # Note that we don't want to count backslashed ()[]{} since they could be in regexps.  Also, ()[] are valid variable names
+    # when preceded by a string.
+
+    # We can ignore the scope stack because we're just skipping everything without parsing, and we need recursion anyway.
+    elsif ($tokens->[$$indexRef] eq '{' && !$self->IsBackslashed($$indexRef))
+        {
+        $$indexRef++;
+        $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, '}', $noRegExps, $allowStringedClosingParens);
+        }
+    elsif ($tokens->[$$indexRef] eq '(' && !$self->IsBackslashed($$indexRef) && !$self->IsStringed($$indexRef))
+        {
+        # Temporarily allow stringed closing parenthesis if it looks like we're in an anonymous function declaration with Perl's
+        # cheap version of prototypes, such as "my $_declare = sub($) {}".
+        my $tempAllowStringedClosingParens = $allowStringedClosingParens;
+        if (!$allowStringedClosingParens)
+        	{
+        	my $tempIndex = $$indexRef - 1;
+        	if ($tempIndex >= 0 && $tokens->[$tempIndex] =~ /^[ \t]/)
+        		{  $tempIndex--;  }
+        	if ($tempIndex >= 0 && $tokens->[$tempIndex] eq 'sub')
+        		{  $tempAllowStringedClosingParens = 1;  }
+        	}
+
+        $$indexRef++;
+
+        do
+            {  $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, ')', $noRegExps, $tempAllowStringedClosingParens);  }
+        while ($$indexRef < scalar @$tokens && $self->IsStringed($$indexRef - 1) && !$tempAllowStringedClosingParens);
+        }
+    elsif ($tokens->[$$indexRef] eq '[' && !$self->IsBackslashed($$indexRef) && !$self->IsStringed($$indexRef))
+        {
+        $$indexRef++;
+
+        do
+            {  $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, ']', $noRegExps, $allowStringedClosingParens);  }
+        while ($$indexRef < scalar @$tokens && $self->IsStringed($$indexRef - 1));
+        }
+
+    elsif ($self->TryToSkipWhitespace($indexRef, $lineNumberRef) ||
+            $self->TryToSkipString($indexRef, $lineNumberRef) ||
+            $self->TryToSkipHereDocDeclaration($indexRef, $lineNumberRef) ||
+            (!$noRegExps && $self->TryToSkipRegexp($indexRef, $lineNumberRef) ) )
+        {
+        }
+
+    else
+        {  $$indexRef++;  };
+    };
+
+
+#
+#   Function: GenericSkipUntilAfter
+#
+#   Advances the position via <GenericSkip()> until a specific token is reached and passed.
+#
+sub GenericSkipUntilAfter #(indexRef, lineNumberRef, token, noRegExps, allowStringedClosingParens)
+    {
+    my ($self, $indexRef, $lineNumberRef, $token, $noRegExps, $allowStringedClosingParens) = @_;
+    my $tokens = $self->Tokens();
+
+    while ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] ne $token)
+        {  $self->GenericSkip($indexRef, $lineNumberRef, $noRegExps, $allowStringedClosingParens);  };
+
+    if ($tokens->[$$indexRef] eq "\n")
+        {  $$lineNumberRef++;  };
+    $$indexRef++;
+    };
+
+
+#
+#   Function: GenericRegexpSkip
+#
+#   Advances the position one place through regexp code.
+#
+#   - If the position is on an opening symbol, it will skip until the past the closing symbol.
+#   - If the position is on a backslash, it will skip it and the following token.
+#   - If the position is on whitespace (not including comments), it will skip it completely.
+#   - Otherwise it skips one token.
+#
+#   Also differs from <GenericSkip()> in that the parenthesis in $( and $) do count against the scope, where they wouldn't
+#   normally.
+#
+#   Parameters:
+#
+#       indexRef - A reference to the current index.
+#       lineNumberRef - A reference to the current line number.
+#       inBrackets - Whether we're in brackets or not.  If true, we don't care about matching braces and parenthesis.
+#
+sub GenericRegexpSkip #(indexRef, lineNumberRef, inBrackets)
+    {
+    my ($self, $indexRef, $lineNumberRef, $inBrackets) = @_;
+    my $tokens = $self->Tokens();
+
+    if ($tokens->[$$indexRef] eq "\\" && $$indexRef + 1 < scalar @$tokens && $tokens->[$$indexRef+1] ne "\n")
+        {  $$indexRef += 2;  }
+
+    # We can ignore the scope stack because we're just skipping everything without parsing, and we need recursion anyway.
+    elsif ($tokens->[$$indexRef] eq '{' && !$self->IsBackslashed($$indexRef) && !$inBrackets)
+        {
+        $$indexRef++;
+        $self->GenericRegexpSkipUntilAfter($indexRef, $lineNumberRef, '}');
+        }
+    elsif ($tokens->[$$indexRef] eq '(' && !$self->IsBackslashed($$indexRef) && !$inBrackets)
+        {
+        $$indexRef++;
+        $self->GenericRegexpSkipUntilAfter($indexRef, $lineNumberRef, ')');
+        }
+    elsif ($tokens->[$$indexRef] eq '[' && !$self->IsBackslashed($$indexRef) && !$self->IsStringed($$indexRef))
+        {
+        $$indexRef++;
+
+        do
+            {  $self->GenericRegexpSkipUntilAfter($indexRef, $lineNumberRef, ']');  }
+        while ($$indexRef < scalar @$tokens && $self->IsStringed($$indexRef - 1));
+        }
+
+    elsif ($tokens->[$$indexRef] eq "\n")
+        {
+        $$lineNumberRef++;
+        $$indexRef++;
+        }
+
+    else
+        {  $$indexRef++;  };
+    };
+
+
+#
+#   Function: GenericRegexpSkipUntilAfter
+#
+#   Advances the position via <GenericRegexpSkip()> until a specific token is reached and passed.
+#
+sub GenericRegexpSkipUntilAfter #(indexRef, lineNumberRef, token)
+    {
+    my ($self, $indexRef, $lineNumberRef, $token) = @_;
+    my $tokens = $self->Tokens();
+
+    my $inBrackets = ( $token eq ']' );
+
+    while ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] ne $token)
+        {  $self->GenericRegexpSkip($indexRef, $lineNumberRef, $inBrackets);  };
+
+    if ($tokens->[$$indexRef] eq "\n")
+        {  $$lineNumberRef++;  };
+    $$indexRef++;
+    };
+
+
+#
+#   Function: SkipRestOfStatement
+#
+#   Advances the position via <GenericSkip()> until after the end of the current statement, which is defined as a semicolon or
+#   a brace group.  Of course, either of those appearing inside parenthesis, a nested brace group, etc. don't count.
+#
+sub SkipRestOfStatement #(indexRef, lineNumberRef)
+    {
+    my ($self, $indexRef, $lineNumberRef) = @_;
+    my $tokens = $self->Tokens();
+
+    while ($$indexRef < scalar @$tokens &&
+             $tokens->[$$indexRef] ne ';' &&
+             !($tokens->[$$indexRef] eq '{' && !$self->IsStringed($$indexRef)) )
+        {
+        $self->GenericSkip($indexRef, $lineNumberRef);
+        };
+
+    if ($tokens->[$$indexRef] eq ';')
+        {  $$indexRef++;  }
+    elsif ($tokens->[$$indexRef] eq '{')
+        {  $self->GenericSkip($indexRef, $lineNumberRef);  };
+    };
+
+
+#
+#   Function: TryToSkipWhitespace
+#
+#   If the current position is on whitespace it skips them and returns true.  If there are a number of these in a row, it skips them
+#   all.
+#
+#   Supported Syntax:
+#
+#       - Whitespace
+#       - Line break
+#       - All comment forms supported by <TryToSkipComment()>
+#       - Here Doc content
+#
+sub TryToSkipWhitespace #(indexRef, lineNumberRef)
+    {
+    my ($self, $indexRef, $lineNumberRef) = @_;
+    my $tokens = $self->Tokens();
+
+    my $result;
+
+    while ($$indexRef < scalar @$tokens)
+        {
+        if ($self->TryToSkipHereDocContent($indexRef, $lineNumberRef) ||
+            $self->TryToSkipComment($indexRef, $lineNumberRef))
+            {
+            $result = 1;
+            }
+        elsif ($tokens->[$$indexRef] =~ /^[ \t]/)
+            {
+            $$indexRef++;
+            $result = 1;
+            }
+        elsif ($tokens->[$$indexRef] eq "\n")
+            {
+            $$indexRef++;
+            $$lineNumberRef++;
+            $result = 1;
+            }
+        else
+            {  last;  };
+        };
+
+    return $result;
+    };
+
+
+#
+#   Function: TryToSkipComment
+#   If the current position is on a comment, skip past it and return true.
+#
+sub TryToSkipComment #(indexRef, lineNumberRef)
+    {
+    my ($self, $indexRef, $lineNumberRef) = @_;
+
+    return ( $self->TryToSkipLineComment($indexRef, $lineNumberRef) ||
+                $self->TryToSkipPODComment($indexRef, $lineNumberRef) );
+    };
+
+
+#
+#   Function: TryToSkipLineComment
+#   If the current position is on a line comment symbol, skip past it and return true.
+#
+sub TryToSkipLineComment #(indexRef, lineNumberRef)
+    {
+    my ($self, $indexRef, $lineNumberRef) = @_;
+    my $tokens = $self->Tokens();
+
+    # Note that $#var is not a comment.
+    if ($tokens->[$$indexRef] eq '#' && !$self->IsStringed($$indexRef))
+        {
+        $self->SkipRestOfLine($indexRef, $lineNumberRef);
+        return 1;
+        }
+    else
+        {  return undef;  };
+    };
+
+
+#
+#   Function: TryToSkipPODComment
+#   If the current position is on a POD comment symbol, skip past it and return true.
+#
+sub TryToSkipPODComment #(indexRef, lineNumberRef)
+    {
+    my ($self, $indexRef, $lineNumberRef) = @_;
+    my $tokens = $self->Tokens();
+
+    # Note that whitespace is not allowed before the equals sign.  It must directly start a line.
+    if ($tokens->[$$indexRef] eq '=' &&
+        ( $$indexRef == 0 || $tokens->[$$indexRef - 1] eq "\n" ) &&
+        $tokens->[$$indexRef + 1] =~ /^[a-z]/i )
+        {
+        # Skip until =cut or (NDPODBREAK).  Note that it's theoretically possible for =cut to appear without a prior POD directive.
+
+        do
+            {
+            if ($tokens->[$$indexRef] eq '=' && lc( $tokens->[$$indexRef + 1] ) eq 'cut')
+                {
+                $self->SkipRestOfLine($indexRef, $lineNumberRef);
+                last;
+                }
+            elsif ($tokens->[$$indexRef] eq '(' && $$indexRef + 2 < scalar @$tokens &&
+                    $tokens->[$$indexRef+1] eq 'NDPODBREAK' && $tokens->[$$indexRef+2] eq ')')
+                {
+                $$indexRef += 3;
+                last;
+                }
+            else
+                {
+                $self->SkipRestOfLine($indexRef, $lineNumberRef);
+                };
+            }
+        while ($$indexRef < scalar @$tokens);
+
+        return 1;
+        }
+
+    # It's also possible that (NDPODBREAK) will appear without any opening pod statement because "=begin nd" and "=cut" will
+    # still result in one.  We need to pick off the stray (NDPODBREAK).
+    elsif ($tokens->[$$indexRef] eq '(' && $$indexRef + 2 < scalar @$tokens &&
+            $tokens->[$$indexRef+1] eq 'NDPODBREAK' && $tokens->[$$indexRef+2] eq ')')
+        {
+        $$indexRef += 3;
+        return 1;
+        }
+
+    else
+        {  return undef;  };
+    };
+
+
+#
+#   Function: TryToSkipString
+#   If the current position is on a string delimiter, skip past the string and return true.
+#
+#   Parameters:
+#
+#       indexRef - A reference to the index of the position to start at.
+#       lineNumberRef - A reference to the line number of the position.
+#       startContentIndexRef - A reference to the variable in which to store the index of the first content token.  May be undef.
+#       endContentIndexRef - A reference to the variable in which to store the index of the end of the content, which is one past
+#                                        the last content token.  may be undef.
+#
+#   Returns:
+#
+#       Whether the position was at a string.  The index, line number, and content index variabls will only be changed if true.
+#
+#   Syntax Support:
+#
+#       - Supports quotes, apostrophes, backticks, q(), qq(), qx(), and qw().
+#       - All symbols are supported for the letter forms.
+#
+sub TryToSkipString #(indexRef, lineNumberRef, startContentIndexRef, endContentIndexRef)
+    {
+    my ($self, $indexRef, $lineNumberRef, $startContentIndexRef, $endContentIndexRef) = @_;
+    my $tokens = $self->Tokens();
+
+    # The three string delimiters.  All three are Perl variables when preceded by a dollar sign.
+    if (!$self->IsStringed($$indexRef) &&
+        ( $self->SUPER::TryToSkipString($indexRef, $lineNumberRef, '\'', '\'', $startContentIndexRef, $endContentIndexRef) ||
+          $self->SUPER::TryToSkipString($indexRef, $lineNumberRef, '"', '"', $startContentIndexRef, $endContentIndexRef) ||
+          $self->SUPER::TryToSkipString($indexRef, $lineNumberRef, '`', '`', $startContentIndexRef, $endContentIndexRef) ) )
+        {
+        return 1;
+        }
+    elsif ($tokens->[$$indexRef] =~ /^(?:q|qq|qx|qw)$/i &&
+            ($$indexRef == 0 || $tokens->[$$indexRef - 1] !~ /^[\$\%\@\*]$/))
+        {
+        $$indexRef++;
+
+        $self->TryToSkipWhitespace($indexRef, $lineNumberRef);
+
+        my $openingSymbol = $tokens->[$$indexRef];
+        my $closingSymbol;
+
+        if ($openingSymbol eq '{')
+            {  $closingSymbol = '}';  }
+        elsif ($openingSymbol eq '(')
+            {  $closingSymbol = ')';  }
+        elsif ($openingSymbol eq '[')
+            {  $closingSymbol = ']';  }
+        elsif ($openingSymbol eq '<')
+            {  $closingSymbol = '>';  }
+        else
+            {  $closingSymbol = $openingSymbol;  };
+
+        $self->SUPER::TryToSkipString($indexRef, $lineNumberRef, $openingSymbol, $closingSymbol,
+                                                      $startContentIndexRef, $endContentIndexRef);
+
+        return 1;
+        }
+    else
+        {  return undef;  };
+    };
+
+
+#
+#   Function: TryToSkipHereDocDeclaration
+#
+#   If the current position is on a Here Doc declaration, add its terminators to <hereDocTerminators> and skip it.
+#
+#   Syntax Support:
+#
+#       - Supports <<EOF
+#       - Supports << "String" with all string forms supported by <TryToSkipString()>.
+#
+sub TryToSkipHereDocDeclaration #(indexRef, lineNumberRef)
+    {
+    my ($self, $indexRef, $lineNumberRef) = @_;
+    my $tokens = $self->Tokens();
+
+    my $index = $$indexRef;
+    my $lineNumber = $$lineNumberRef;
+
+    if ($tokens->[$index] eq '<' && $tokens->[$index + 1] eq '<')
+        {
+        $index += 2;
+        my $success;
+
+        # No whitespace allowed with the bare word.
+        if ($tokens->[$index] =~ /^[a-z0-9_]/i)
+            {
+            push @hereDocTerminators, [ $tokens->[$index] ];
+            $index++;
+            $success = 1;
+            }
+        else
+            {
+            $self->TryToSkipWhitespace(\$index, \$lineNumber);
+
+            my ($contentStart, $contentEnd);
+            if ($self->TryToSkipString(\$index, \$lineNumber, \$contentStart, \$contentEnd))
+                {
+                push @hereDocTerminators, [ @{$tokens}[$contentStart..$contentEnd - 1] ];
+                $success = 1;
+                };
+            };
+
+        if ($success)
+            {
+            $$indexRef = $index;
+            $$lineNumberRef = $lineNumber;
+
+            return 1;
+            };
+        };
+
+    return 0;
+    };
+
+
+#
+#   Function: TryToSkipHereDocContent
+#
+#   If the current position is at the beginning of a line and there are entries in <hereDocTerminators>, skips lines until all the
+#   terminators are exhausted or we reach the end of the file.
+#
+#   Returns:
+#
+#       Whether the position was on Here Doc content.
+#
+sub TryToSkipHereDocContent #(indexRef, lineNumberRef)
+    {
+    my ($self, $indexRef, $lineNumberRef) = @_;
+    my $tokens = $self->Tokens();
+
+    # We don't use IsFirstLineToken() because it really needs to be the first line token.  Whitespace is not allowed.
+    if ($$indexRef > 0 && $tokens->[$$indexRef - 1] eq "\n")
+        {
+        my $success = (scalar @hereDocTerminators > 0);
+
+        while (scalar @hereDocTerminators && $$indexRef < scalar @$tokens)
+            {
+            my $terminatorIndex = 0;
+
+            while ($hereDocTerminators[0]->[$terminatorIndex] eq $tokens->[$$indexRef])
+                {
+                $terminatorIndex++;
+                $$indexRef++;
+                };
+
+            if ($terminatorIndex == scalar @{$hereDocTerminators[0]} &&
+                ($tokens->[$$indexRef] eq "\n" || ($tokens->[$$indexRef] =~ /^[ \t]/ && $tokens->[$$indexRef + 1] eq "\n")) )
+                {
+                shift @hereDocTerminators;
+                $$indexRef++;
+                $$lineNumberRef++;
+                }
+            else
+                {  $self->SkipRestOfLine($indexRef, $lineNumberRef);  };
+            };
+
+        return $success;
+        }
+
+    else
+        {  return 0;  };
+    };
+
+
+#
+#   Function: TryToSkipRegexp
+#   If the current position is on a regular expression or a quote-like operator, skip past it and return true.
+#
+#   Syntax Support:
+#
+#       - Supports //, ??, m//, qr//, s///, tr///, and y///.
+#       - All symbols are supported for the letter forms.
+#       - ?? is *not* supported because it could cause problems with ?: statements.  The generic parser has a good chance of
+#         successfully stumbling through a regex, whereas the regex code will almost certainly see the rest of the file as part of it.
+#
+sub TryToSkipRegexp #(indexRef, lineNumberRef)
+    {
+    my ($self, $indexRef, $lineNumberRef) = @_;
+    my $tokens = $self->Tokens();
+
+    my $isRegexp;
+
+    # If it's a supported character sequence that's not a variable (ex $qr)...
+    if ($tokens->[$$indexRef] =~ /^(?:m|qr|s|tr|y)$/i &&
+         ($$indexRef == 0 || $tokens->[$$indexRef - 1] !~ /^[\$\%\@\*\-]$/) )
+        {  $isRegexp = 1;  }
+
+    elsif ($tokens->[$$indexRef] eq '/' && !$self->IsStringed($$indexRef))
+        {
+        # This is a bit of a hack.  If we find a random slash, it could be a divide operator or a bare regexp.  Find the first previous
+        # non-whitespace token and if it's text, a closing brace, or a string, assume it's a divide operator.  (Strings don't make
+        # much pratical sense there but a regexp would be impossible.)  Otherwise assume it's a regexp.
+
+        # We make a special consideration for split() appearing without parenthesis.  If the previous token is split and it's not a
+        # variable, assume it is a regexp even though it fails the above test.
+
+        my $index = $$indexRef - 1;
+
+        while ($index >= 0 && $tokens->[$index] =~ /^(?: |\t|\n)/)
+            {  $index--;  };
+
+        if ($index < 0 || $tokens->[$index] !~ /^[a-zA-Z0-9_\)\]\}\'\"\`]/ ||
+            ($tokens->[$index] =~ /^split|grep$/ && $index > 0 && $tokens->[$index-1] !~ /^[\$\%\@\*]$/) )
+            {  $isRegexp = 1;  };
+        };
+
+    if ($isRegexp)
+        {
+        my $operator = lc($tokens->[$$indexRef]);
+        my $index = $$indexRef;
+        my $lineNumber = $$lineNumberRef;
+
+        if ($operator =~ /^[\?\/]/)
+            {  $operator = 'm';  }
+        else
+            {
+            $index++;
+
+            # Believe it or not, s#...# is allowed.  We can't pass over number signs here.
+            if ($tokens->[$index] ne '#')
+                {  $self->TryToSkipWhitespace(\$index, \$lineNumber);  };
+            };
+
+        if ($tokens->[$index] =~ /^\w/)
+            {  return undef;  };
+        if ($tokens->[$index] eq '=' && $tokens->[$index+1] eq '>')
+        	{  return undef;  };
+
+        my $openingSymbol = $tokens->[$index];
+        my $closingSymbol;
+
+        if ($openingSymbol eq '{')
+            {  $closingSymbol = '}';  }
+        elsif ($openingSymbol eq '(')
+            {  $closingSymbol = ')';  }
+        elsif ($openingSymbol eq '[')
+            {  $closingSymbol = ']';  }
+        elsif ($openingSymbol eq '<')
+            {  $closingSymbol = '>';  }
+        else
+            {  $closingSymbol = $openingSymbol;  };
+
+        $index++;
+
+        $self->GenericRegexpSkipUntilAfter(\$index, \$lineNumber, $closingSymbol);
+
+        $$indexRef = $index;
+        $$lineNumberRef = $lineNumber;
+
+        if ($operator =~ /^(?:s|tr|y)$/)
+            {
+            if ($openingSymbol ne $closingSymbol)
+                {
+                $self->TryToSkipWhitespace($indexRef, $lineNumberRef);
+
+                $openingSymbol = $tokens->[$index];
+
+                if ($openingSymbol eq '{')
+                    {  $closingSymbol = '}';  }
+                elsif ($openingSymbol eq '(')
+                    {  $closingSymbol = ')';  }
+                elsif ($openingSymbol eq '[')
+                    {  $closingSymbol = ']';  }
+                elsif ($openingSymbol eq '<')
+                    {  $closingSymbol = '>';  }
+                else
+                    {  $closingSymbol = $openingSymbol;  };
+
+                $$indexRef++;
+                };
+
+            if ($operator eq 's')
+                {
+                $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, $closingSymbol, 1);
+                }
+            else # ($operator eq 'tr' || $operator eq 'y')
+                {
+                while ($$indexRef < scalar @$tokens &&
+                          ($tokens->[$$indexRef] ne $closingSymbol || $self->IsBackslashed($$indexRef)) )
+                    {
+                    if ($tokens->[$$indexRef] eq "\n")
+                        {  $$lineNumberRef++;  };
+                    $$indexRef++;
+                    };
+
+                $$indexRef++;
+                };
+            };
+
+        # We want to skip any letters after the regexp.  Otherwise something like tr/a/b/s; could have the trailing s; interpreted
+        # as another regexp.  Whitespace is not allowed between the closing symbol and the letters.
+
+        if ($tokens->[$$indexRef] =~ /^[a-z]/i)
+            {  $$indexRef++;  };
+
+        return 1;
+        };
+
+    return undef;
+    };
+
+
+
+###############################################################################
+# Group: Support Functions
+
+
+#
+#   Function: IsStringed
+#
+#   Returns whether the position is after a string (dollar sign) character.  Returns false if it's preceded by two dollar signs so
+#   "if ($x == $$)" doesn't skip the closing parenthesis as stringed.
+#
+#   Parameters:
+#
+#       index - The index of the postition.
+#
+sub IsStringed #(index)
+    {
+    my ($self, $index) = @_;
+    my $tokens = $self->Tokens();
+
+    if ($index > 0 && $tokens->[$index - 1] eq '$' && !($index > 1 && $tokens->[$index - 2] eq '$'))
+        {  return 1;  }
+    else
+        {  return undef;  };
+    };
+
+
+1;
author	Magnus Auvinen <magnus.auvinen@gmail.com>	2008-08-02 08:21:29 +0000
committer	Magnus Auvinen <magnus.auvinen@gmail.com>	2008-08-02 08:21:29 +0000
commit	61bfe2d70cae6be8c4086a210a5451135ccca9ea (patch)
tree	62bf7808b1b2bfe5f56fe1e329871fb0991d0687 /docs/tool/Modules/NaturalDocs/Languages/Perl.pm
parent	a13b94f9e0bca8ea892311d9d9e0c0bc48616ea7 (diff)
download	zcatch-61bfe2d70cae6be8c4086a210a5451135ccca9ea.tar.gz zcatch-61bfe2d70cae6be8c4086a210a5451135ccca9ea.zip