added doc tool

author: Magnus Auvinen <magnus.auvinen@gmail.com> 2008-08-02 08:21:29 +0000
committer: Magnus Auvinen <magnus.auvinen@gmail.com> 2008-08-02 08:21:29 +0000
commit: 61bfe2d70cae6be8c4086a210a5451135ccca9ea (patch)
tree: 62bf7808b1b2bfe5f56fe1e329871fb0991d0687 /docs/tool/Modules/NaturalDocs/Languages/Simple.pm
parent: a13b94f9e0bca8ea892311d9d9e0c0bc48616ea7 (diff)
download: zcatch-61bfe2d70cae6be8c4086a210a5451135ccca9ea.tar.gz
zcatch-61bfe2d70cae6be8c4086a210a5451135ccca9ea.zip
1 files changed, 503 insertions, 0 deletions
diff --git a/docs/tool/Modules/NaturalDocs/Languages/Simple.pm b/docs/tool/Modules/NaturalDocs/Languages/Simple.pm
new file mode 100644
index 00000000..9d962b1c
--- /dev/null
+++ b/docs/tool/Modules/NaturalDocs/Languages/Simple.pm
@@ -0,0 +1,503 @@
+###############################################################################
+#
+#   Class: NaturalDocs::Languages::Simple
+#
+###############################################################################
+#
+#   A class containing the characteristics of a particular programming language for basic support within Natural Docs.
+#   Also serves as a base class for languages that break from general conventions, such as not having parameter lists use
+#   parenthesis and commas.
+#
+###############################################################################
+
+# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure
+# Natural Docs is licensed under the GPL
+
+use strict;
+use integer;
+
+package NaturalDocs::Languages::Simple;
+
+use base 'NaturalDocs::Languages::Base';
+use base 'Exporter';
+
+our @EXPORT = ( 'ENDER_ACCEPT', 'ENDER_IGNORE', 'ENDER_ACCEPT_AND_CONTINUE', 'ENDER_REVERT_TO_ACCEPTED' );
+
+
+use NaturalDocs::DefineMembers 'LINE_COMMENT_SYMBOLS', 'LineCommentSymbols()', 'SetLineCommentSymbols() duparrayref',
+                                                 'BLOCK_COMMENT_SYMBOLS', 'BlockCommentSymbols()',
+                                                                                              'SetBlockCommentSymbols() duparrayref',
+                                                 'PROTOTYPE_ENDERS',
+                                                 'LINE_EXTENDER', 'LineExtender()', 'SetLineExtender()',
+                                                 'PACKAGE_SEPARATOR', 'PackageSeparator()',
+                                                 'PACKAGE_SEPARATOR_WAS_SET', 'PackageSeparatorWasSet()',
+                                                 'ENUM_VALUES', 'EnumValues()',
+                                                 'ENUM_VALUES_WAS_SET', 'EnumValuesWasSet()';
+
+#
+#   Function: New
+#
+#   Creates and returns a new object.
+#
+#   Parameters:
+#
+#       name - The name of the language.
+#
+sub New #(name)
+    {
+    my ($selfPackage, $name) = @_;
+
+    my $object = $selfPackage->SUPER::New($name);
+
+    $object->[ENUM_VALUES] = ::ENUM_GLOBAL();
+    $object->[PACKAGE_SEPARATOR] = '.';
+
+    return $object;
+    };
+
+
+#
+#   Functions: Members
+#
+#   LineCommentSymbols - Returns an arrayref of symbols that start a line comment, or undef if none.
+#   SetLineCommentSymbols - Replaces the arrayref of symbols that start a line comment.
+#   BlockCommentSymbols - Returns an arrayref of start/end symbol pairs that specify a block comment, or undef if none.  Pairs
+#                                        are specified with two consecutive array entries.
+#   SetBlockCommentSymbols - Replaces the arrayref of start/end symbol pairs that specify a block comment.  Pairs are
+#                                             specified with two consecutive array entries.
+#   LineExtender - Returns the symbol to ignore a line break in languages where line breaks are significant.
+#   SetLineExtender - Replaces the symbol to ignore a line break in languages where line breaks are significant.
+#   PackageSeparator - Returns the package separator symbol.
+#   PackageSeparatorWasSet - Returns whether the package separator symbol was ever changed from the default.
+#
+
+#
+#   Function: SetPackageSeparator
+#   Replaces the language's package separator string.
+#
+sub SetPackageSeparator #(separator)
+    {
+    my ($self, $separator) = @_;
+    $self->[PACKAGE_SEPARATOR] = $separator;
+    $self->[PACKAGE_SEPARATOR_WAS_SET] = 1;
+    };
+
+
+#
+#   Functions: Members
+#
+#   EnumValues - Returns the <EnumValuesType> that describes how the language handles enums.
+#   EnumValuesWasSet - Returns whether <EnumValues> was ever changed from the default.
+
+
+#
+#   Function: SetEnumValues
+#   Replaces the <EnumValuesType> that describes how the language handles enums.
+#
+sub SetEnumValues #(EnumValuesType newBehavior)
+    {
+    my ($self, $behavior) = @_;
+    $self->[ENUM_VALUES] = $behavior;
+    $self->[ENUM_VALUES_WAS_SET] = 1;
+    };
+
+
+#
+#   Function: PrototypeEndersFor
+#
+#   Returns an arrayref of prototype ender symbols for the passed <TopicType>, or undef if none.
+#
+sub PrototypeEndersFor #(type)
+    {
+    my ($self, $type) = @_;
+
+    if (defined $self->[PROTOTYPE_ENDERS])
+        {  return $self->[PROTOTYPE_ENDERS]->{$type};  }
+    else
+        {  return undef;  };
+    };
+
+
+#
+#   Function: SetPrototypeEndersFor
+#
+#   Replaces the arrayref of prototype ender symbols for the passed <TopicType>.
+#
+sub SetPrototypeEndersFor #(type, enders)
+    {
+    my ($self, $type, $enders) = @_;
+
+    if (!defined $self->[PROTOTYPE_ENDERS])
+        {  $self->[PROTOTYPE_ENDERS] = { };  };
+
+    if (!defined $enders)
+        {  delete $self->[PROTOTYPE_ENDERS]->{$type};  }
+    else
+        {
+        $self->[PROTOTYPE_ENDERS]->{$type} = [ @$enders ];
+        };
+    };
+
+
+
+
+###############################################################################
+# Group: Parsing Functions
+
+
+#
+#   Function: ParseFile
+#
+#   Parses the passed source file, sending comments acceptable for documentation to <NaturalDocs::Parser->OnComment()>
+#   and all other sections to <OnCode()>.
+#
+#   Parameters:
+#
+#       sourceFile - The <FileName> of the source file to parse.
+#       topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file.
+#
+#   Returns:
+#
+#       Since this class cannot automatically document the code or generate a scope record, it always returns ( undef, undef ).
+#
+sub ParseFile #(sourceFile, topicsList)
+    {
+    my ($self, $sourceFile, $topicsList) = @_;
+
+    open(SOURCEFILEHANDLE, '<' . $sourceFile)
+        or die "Couldn't open input file " . $sourceFile . "\n";
+
+    my @commentLines;
+    my @codeLines;
+    my $lastCommentTopicCount = 0;
+
+    if ($self->Name() eq 'Text File')
+        {
+        my $line = <SOURCEFILEHANDLE>;
+
+        # On the very first line, remove a Unicode BOM if present.  Information on it available at:
+        # http://www.unicode.org/faq/utf_bom.html#BOM
+        $line =~ s/^\xEF\xBB\xBF//;
+
+        while ($line)
+            {
+            ::XChomp(\$line);
+            push @commentLines, $line;
+            $line = <SOURCEFILEHANDLE>;
+            };
+
+        NaturalDocs::Parser->OnComment(\@commentLines, 1);
+        }
+
+    else
+        {
+        my $line = <SOURCEFILEHANDLE>;
+        my $lineNumber = 1;
+
+        # On the very first line, remove a Unicode BOM if present.  Information on it available at:
+        # http://www.unicode.org/faq/utf_bom.html#BOM
+        $line =~ s/^\xEF\xBB\xBF//;
+
+        while (defined $line)
+            {
+            ::XChomp(\$line);
+            my $originalLine = $line;
+
+
+            # Retrieve single line comments.  This leaves $line at the next line.
+
+            if ($self->StripOpeningSymbols(\$line, $self->LineCommentSymbols()))
+                {
+                do
+                    {
+                    push @commentLines, $line;
+                    $line = <SOURCEFILEHANDLE>;
+
+                    if (!defined $line)
+                        {  goto EndDo;  };
+
+                    ::XChomp(\$line);
+                    }
+                while ($self->StripOpeningSymbols(\$line, $self->LineCommentSymbols()));
+
+                EndDo:  # I hate Perl sometimes.
+                }
+
+
+            # Retrieve multiline comments.  This leaves $line at the next line.
+
+            elsif (my $closingSymbol = $self->StripOpeningBlockSymbols(\$line, $self->BlockCommentSymbols()))
+                {
+                # Note that it is possible for a multiline comment to start correctly but not end so.  We want those comments to stay in
+                # the code.  For example, look at this prototype with this splint annotation:
+                #
+                # int get_array(integer_t id,
+                #                    /*@out@*/ array_t array);
+                #
+                # The annotation starts correctly but doesn't end so because it is followed by code on the same line.
+
+                my $lineRemainder;
+
+                for (;;)
+                    {
+                    $lineRemainder = $self->StripClosingSymbol(\$line, $closingSymbol);
+
+                    push @commentLines, $line;
+
+                    #  If we found an end comment symbol...
+                    if (defined $lineRemainder)
+                        {  last;  };
+
+                    $line = <SOURCEFILEHANDLE>;
+
+                    if (!defined $line)
+                        {  last;  };
+
+                    ::XChomp(\$line);
+                    };
+
+                if ($lineRemainder !~ /^[ \t]*$/)
+                    {
+                    # If there was something past the closing symbol this wasn't an acceptable comment, so move the lines to code.
+                    push @codeLines, @commentLines;
+                    @commentLines = ( );
+                    };
+
+                $line = <SOURCEFILEHANDLE>;
+                }
+
+
+            # Otherwise just add it to the code.
+
+            else
+                {
+                push @codeLines, $line;
+                $line = <SOURCEFILEHANDLE>;
+                };
+
+
+            # If there were comments, send them to Parser->OnComment().
+
+            if (scalar @commentLines)
+                {
+                # First process any code lines before the comment.
+                if (scalar @codeLines)
+                    {
+                    $self->OnCode(\@codeLines, $lineNumber, $topicsList, $lastCommentTopicCount);
+                    $lineNumber += scalar @codeLines;
+                    @codeLines = ( );
+                    };
+
+                $lastCommentTopicCount = NaturalDocs::Parser->OnComment(\@commentLines, $lineNumber);
+                $lineNumber += scalar @commentLines;
+                @commentLines = ( );
+                };
+
+            };  # while (defined $line)
+
+
+        # Clean up any remaining code.
+        if (scalar @codeLines)
+            {
+            $self->OnCode(\@codeLines, $lineNumber, $topicsList, $lastCommentTopicCount);
+            @codeLines = ( );
+            };
+
+        };
+
+    close(SOURCEFILEHANDLE);
+
+    return ( undef, undef );
+    };
+
+
+#
+#   Function: OnCode
+#
+#   Called whenever a section of code is encountered by the parser.  Is used to find the prototype of the last topic created.
+#
+#   Parameters:
+#
+#       codeLines - The source code as an arrayref of lines.
+#       codeLineNumber - The line number of the first line of code.
+#       topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file.
+#       lastCommentTopicCount - The number of Natural Docs topics that were created by the last comment.
+#
+sub OnCode #(codeLines, codeLineNumber, topicList, lastCommentTopicCount)
+    {
+    my ($self, $codeLines, $codeLineNumber, $topicList, $lastCommentTopicCount) = @_;
+
+    if ($lastCommentTopicCount && defined $self->PrototypeEndersFor($topicList->[-1]->Type()))
+        {
+        my $lineIndex = 0;
+        my $prototype;
+
+        # Skip all blank lines before a prototype.
+        while ($lineIndex < scalar @$codeLines && $codeLines->[$lineIndex] =~ /^[ \t]*$/)
+            {  $lineIndex++;  };
+
+        my @tokens;
+        my $tokenIndex = 0;
+
+        my @brackets;
+        my $enders = $self->PrototypeEndersFor($topicList->[-1]->Type());
+
+        # Add prototype lines until we reach the end of the prototype or the end of the code lines.
+        while ($lineIndex < scalar @$codeLines)
+            {
+            my $line = $self->RemoveLineExtender($codeLines->[$lineIndex] . "\n");
+
+            push @tokens, $line =~ /([^\(\)\[\]\{\}\<\>]+|.)/g;
+
+            while ($tokenIndex < scalar @tokens)
+                {
+                # If we're not inside brackets, check for ender symbols.
+                if (!scalar @brackets)
+                    {
+                    my $startingIndex = 0;
+                    my $testPrototype;
+
+                    for (;;)
+                        {
+                        my ($enderIndex, $ender) = ::FindFirstSymbol($tokens[$tokenIndex], $enders, $startingIndex);
+
+                        if ($enderIndex == -1)
+                            {  last;  }
+                        else
+                            {
+                            # We do this here so we don't duplicate prototype for every single token.  Just the first time an ender symbol
+                            # is found in one.
+                            if (!defined $testPrototype)
+                                {  $testPrototype = $prototype;  };
+
+                            $testPrototype .= substr($tokens[$tokenIndex], $startingIndex, $enderIndex - $startingIndex);
+
+                            my $enderResult;
+
+                            # If the ender is all text and the character preceding or following it is as well, ignore it.
+                            if ($ender =~ /^[a-z0-9]+$/i &&
+                                ( ($enderIndex > 0 && substr($tokens[$tokenIndex], $enderIndex - 1, 1) =~ /^[a-z0-9_]$/i) ||
+                                   substr($tokens[$tokenIndex], $enderIndex + length($ender), 1) =~ /^[a-z0-9_]$/i ) )
+                                {  $enderResult = ENDER_IGNORE();  }
+                            else
+                                {  $enderResult = $self->OnPrototypeEnd($topicList->[-1]->Type(), \$testPrototype, $ender);  }
+
+                            if ($enderResult == ENDER_IGNORE())
+                                {
+                                $testPrototype .= $ender;
+                                $startingIndex = $enderIndex + length($ender);
+                                }
+                            elsif ($enderResult == ENDER_REVERT_TO_ACCEPTED())
+                                {
+                                return;
+                                }
+                            else # ENDER_ACCEPT || ENDER_ACCEPT_AND_CONTINUE
+                                {
+                                my $titleInPrototype = $topicList->[-1]->Title();
+
+                                # Strip parenthesis so Function(2) and Function(int, int) will still match Function(anything).
+                                $titleInPrototype =~ s/[\t ]*\([^\(]*$//;
+
+                                if (index($testPrototype, $titleInPrototype) != -1)
+                                    {
+                                    $topicList->[-1]->SetPrototype( $self->NormalizePrototype($testPrototype) );
+                                    };
+
+                                if ($enderResult == ENDER_ACCEPT())
+                                    {  return;  }
+                                else # ENDER_ACCEPT_AND_CONTINUE
+                                    {
+                                    $testPrototype .= $ender;
+                                    $startingIndex = $enderIndex + length($ender);
+                                    };
+                                };
+                            };
+                        };
+                    }
+
+                # If we are inside brackets, check for closing symbols.
+                elsif ( ($tokens[$tokenIndex] eq ')' && $brackets[-1] eq '(') ||
+                         ($tokens[$tokenIndex] eq ']' && $brackets[-1] eq '[') ||
+                         ($tokens[$tokenIndex] eq '}' && $brackets[-1] eq '{') ||
+                         ($tokens[$tokenIndex] eq '>' && $brackets[-1] eq '<') )
+                    {
+                    pop @brackets;
+                    };
+
+                # Check for opening brackets.
+                if ($tokens[$tokenIndex] =~ /^[\(\[\{\<]$/)
+                    {
+                    push @brackets, $tokens[$tokenIndex];
+                    };
+
+                $prototype .= $tokens[$tokenIndex];
+                $tokenIndex++;
+                };
+
+            $lineIndex++;
+            };
+
+        # If we got out of that while loop by running out of lines, there was no prototype.
+        };
+    };
+
+
+use constant ENDER_ACCEPT => 1;
+use constant ENDER_IGNORE => 2;
+use constant ENDER_ACCEPT_AND_CONTINUE => 3;
+use constant ENDER_REVERT_TO_ACCEPTED => 4;
+
+#
+#   Function: OnPrototypeEnd
+#
+#   Called whenever the end of a prototype is found so that there's a chance for derived classes to mark false positives.
+#
+#   Parameters:
+#
+#       type - The <TopicType> of the prototype.
+#       prototypeRef - A reference to the prototype so far, minus the ender in dispute.
+#       ender - The ender symbol.
+#
+#   Returns:
+#
+#       ENDER_ACCEPT - The ender is accepted and the prototype is finished.
+#       ENDER_IGNORE - The ender is rejected and parsing should continue.  Note that the prototype will be rejected as a whole
+#                                  if all enders are ignored before reaching the end of the code.
+#       ENDER_ACCEPT_AND_CONTINUE - The ender is accepted so the prototype may stand as is.  However, the prototype might
+#                                                          also continue on so continue parsing.  If there is no accepted ender between here and
+#                                                          the end of the code this version will be accepted instead.
+#       ENDER_REVERT_TO_ACCEPTED - The expedition from ENDER_ACCEPT_AND_CONTINUE failed.  Use the last accepted
+#                                                        version and end parsing.
+#
+sub OnPrototypeEnd #(type, prototypeRef, ender)
+    {
+    return ENDER_ACCEPT();
+    };
+
+
+#
+#   Function: RemoveLineExtender
+#
+#   If the passed line has a line extender, returns it without the extender or the line break that follows.  If it doesn't, or there are
+#   no line extenders defined, returns the passed line unchanged.
+#
+sub RemoveLineExtender #(line)
+    {
+    my ($self, $line) = @_;
+
+    if (defined $self->LineExtender())
+        {
+        my $lineExtenderIndex = rindex($line, $self->LineExtender());
+
+        if ($lineExtenderIndex != -1 &&
+            substr($line, $lineExtenderIndex + length($self->LineExtender())) =~ /^[ \t]*\n$/)
+            {
+            $line = substr($line, 0, $lineExtenderIndex) . ' ';
+            };
+        };
+
+    return $line;
+    };
+
+
+1;
author	Magnus Auvinen <magnus.auvinen@gmail.com>	2008-08-02 08:21:29 +0000
committer	Magnus Auvinen <magnus.auvinen@gmail.com>	2008-08-02 08:21:29 +0000
commit	61bfe2d70cae6be8c4086a210a5451135ccca9ea (patch)
tree	62bf7808b1b2bfe5f56fe1e329871fb0991d0687 /docs/tool/Modules/NaturalDocs/Languages/Simple.pm
parent	a13b94f9e0bca8ea892311d9d9e0c0bc48616ea7 (diff)
download	zcatch-61bfe2d70cae6be8c4086a210a5451135ccca9ea.tar.gz zcatch-61bfe2d70cae6be8c4086a210a5451135ccca9ea.zip