diff options
| author | Magnus Auvinen <magnus.auvinen@gmail.com> | 2008-08-02 08:21:29 +0000 |
|---|---|---|
| committer | Magnus Auvinen <magnus.auvinen@gmail.com> | 2008-08-02 08:21:29 +0000 |
| commit | 61bfe2d70cae6be8c4086a210a5451135ccca9ea (patch) | |
| tree | 62bf7808b1b2bfe5f56fe1e329871fb0991d0687 /docs/tool/Modules/NaturalDocs/Languages/Simple.pm | |
| parent | a13b94f9e0bca8ea892311d9d9e0c0bc48616ea7 (diff) | |
| download | zcatch-61bfe2d70cae6be8c4086a210a5451135ccca9ea.tar.gz zcatch-61bfe2d70cae6be8c4086a210a5451135ccca9ea.zip | |
added doc tool
Diffstat (limited to 'docs/tool/Modules/NaturalDocs/Languages/Simple.pm')
| -rw-r--r-- | docs/tool/Modules/NaturalDocs/Languages/Simple.pm | 503 |
1 files changed, 503 insertions, 0 deletions
diff --git a/docs/tool/Modules/NaturalDocs/Languages/Simple.pm b/docs/tool/Modules/NaturalDocs/Languages/Simple.pm new file mode 100644 index 00000000..9d962b1c --- /dev/null +++ b/docs/tool/Modules/NaturalDocs/Languages/Simple.pm @@ -0,0 +1,503 @@ +############################################################################### +# +# Class: NaturalDocs::Languages::Simple +# +############################################################################### +# +# A class containing the characteristics of a particular programming language for basic support within Natural Docs. +# Also serves as a base class for languages that break from general conventions, such as not having parameter lists use +# parenthesis and commas. +# +############################################################################### + +# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure +# Natural Docs is licensed under the GPL + +use strict; +use integer; + +package NaturalDocs::Languages::Simple; + +use base 'NaturalDocs::Languages::Base'; +use base 'Exporter'; + +our @EXPORT = ( 'ENDER_ACCEPT', 'ENDER_IGNORE', 'ENDER_ACCEPT_AND_CONTINUE', 'ENDER_REVERT_TO_ACCEPTED' ); + + +use NaturalDocs::DefineMembers 'LINE_COMMENT_SYMBOLS', 'LineCommentSymbols()', 'SetLineCommentSymbols() duparrayref', + 'BLOCK_COMMENT_SYMBOLS', 'BlockCommentSymbols()', + 'SetBlockCommentSymbols() duparrayref', + 'PROTOTYPE_ENDERS', + 'LINE_EXTENDER', 'LineExtender()', 'SetLineExtender()', + 'PACKAGE_SEPARATOR', 'PackageSeparator()', + 'PACKAGE_SEPARATOR_WAS_SET', 'PackageSeparatorWasSet()', + 'ENUM_VALUES', 'EnumValues()', + 'ENUM_VALUES_WAS_SET', 'EnumValuesWasSet()'; + +# +# Function: New +# +# Creates and returns a new object. +# +# Parameters: +# +# name - The name of the language. +# +sub New #(name) + { + my ($selfPackage, $name) = @_; + + my $object = $selfPackage->SUPER::New($name); + + $object->[ENUM_VALUES] = ::ENUM_GLOBAL(); + $object->[PACKAGE_SEPARATOR] = '.'; + + return $object; + }; + + +# +# Functions: Members +# +# LineCommentSymbols - Returns an arrayref of symbols that start a line comment, or undef if none. +# SetLineCommentSymbols - Replaces the arrayref of symbols that start a line comment. +# BlockCommentSymbols - Returns an arrayref of start/end symbol pairs that specify a block comment, or undef if none. Pairs +# are specified with two consecutive array entries. +# SetBlockCommentSymbols - Replaces the arrayref of start/end symbol pairs that specify a block comment. Pairs are +# specified with two consecutive array entries. +# LineExtender - Returns the symbol to ignore a line break in languages where line breaks are significant. +# SetLineExtender - Replaces the symbol to ignore a line break in languages where line breaks are significant. +# PackageSeparator - Returns the package separator symbol. +# PackageSeparatorWasSet - Returns whether the package separator symbol was ever changed from the default. +# + +# +# Function: SetPackageSeparator +# Replaces the language's package separator string. +# +sub SetPackageSeparator #(separator) + { + my ($self, $separator) = @_; + $self->[PACKAGE_SEPARATOR] = $separator; + $self->[PACKAGE_SEPARATOR_WAS_SET] = 1; + }; + + +# +# Functions: Members +# +# EnumValues - Returns the <EnumValuesType> that describes how the language handles enums. +# EnumValuesWasSet - Returns whether <EnumValues> was ever changed from the default. + + +# +# Function: SetEnumValues +# Replaces the <EnumValuesType> that describes how the language handles enums. +# +sub SetEnumValues #(EnumValuesType newBehavior) + { + my ($self, $behavior) = @_; + $self->[ENUM_VALUES] = $behavior; + $self->[ENUM_VALUES_WAS_SET] = 1; + }; + + +# +# Function: PrototypeEndersFor +# +# Returns an arrayref of prototype ender symbols for the passed <TopicType>, or undef if none. +# +sub PrototypeEndersFor #(type) + { + my ($self, $type) = @_; + + if (defined $self->[PROTOTYPE_ENDERS]) + { return $self->[PROTOTYPE_ENDERS]->{$type}; } + else + { return undef; }; + }; + + +# +# Function: SetPrototypeEndersFor +# +# Replaces the arrayref of prototype ender symbols for the passed <TopicType>. +# +sub SetPrototypeEndersFor #(type, enders) + { + my ($self, $type, $enders) = @_; + + if (!defined $self->[PROTOTYPE_ENDERS]) + { $self->[PROTOTYPE_ENDERS] = { }; }; + + if (!defined $enders) + { delete $self->[PROTOTYPE_ENDERS]->{$type}; } + else + { + $self->[PROTOTYPE_ENDERS]->{$type} = [ @$enders ]; + }; + }; + + + + +############################################################################### +# Group: Parsing Functions + + +# +# Function: ParseFile +# +# Parses the passed source file, sending comments acceptable for documentation to <NaturalDocs::Parser->OnComment()> +# and all other sections to <OnCode()>. +# +# Parameters: +# +# sourceFile - The <FileName> of the source file to parse. +# topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file. +# +# Returns: +# +# Since this class cannot automatically document the code or generate a scope record, it always returns ( undef, undef ). +# +sub ParseFile #(sourceFile, topicsList) + { + my ($self, $sourceFile, $topicsList) = @_; + + open(SOURCEFILEHANDLE, '<' . $sourceFile) + or die "Couldn't open input file " . $sourceFile . "\n"; + + my @commentLines; + my @codeLines; + my $lastCommentTopicCount = 0; + + if ($self->Name() eq 'Text File') + { + my $line = <SOURCEFILEHANDLE>; + + # On the very first line, remove a Unicode BOM if present. Information on it available at: + # http://www.unicode.org/faq/utf_bom.html#BOM + $line =~ s/^\xEF\xBB\xBF//; + + while ($line) + { + ::XChomp(\$line); + push @commentLines, $line; + $line = <SOURCEFILEHANDLE>; + }; + + NaturalDocs::Parser->OnComment(\@commentLines, 1); + } + + else + { + my $line = <SOURCEFILEHANDLE>; + my $lineNumber = 1; + + # On the very first line, remove a Unicode BOM if present. Information on it available at: + # http://www.unicode.org/faq/utf_bom.html#BOM + $line =~ s/^\xEF\xBB\xBF//; + + while (defined $line) + { + ::XChomp(\$line); + my $originalLine = $line; + + + # Retrieve single line comments. This leaves $line at the next line. + + if ($self->StripOpeningSymbols(\$line, $self->LineCommentSymbols())) + { + do + { + push @commentLines, $line; + $line = <SOURCEFILEHANDLE>; + + if (!defined $line) + { goto EndDo; }; + + ::XChomp(\$line); + } + while ($self->StripOpeningSymbols(\$line, $self->LineCommentSymbols())); + + EndDo: # I hate Perl sometimes. + } + + + # Retrieve multiline comments. This leaves $line at the next line. + + elsif (my $closingSymbol = $self->StripOpeningBlockSymbols(\$line, $self->BlockCommentSymbols())) + { + # Note that it is possible for a multiline comment to start correctly but not end so. We want those comments to stay in + # the code. For example, look at this prototype with this splint annotation: + # + # int get_array(integer_t id, + # /*@out@*/ array_t array); + # + # The annotation starts correctly but doesn't end so because it is followed by code on the same line. + + my $lineRemainder; + + for (;;) + { + $lineRemainder = $self->StripClosingSymbol(\$line, $closingSymbol); + + push @commentLines, $line; + + # If we found an end comment symbol... + if (defined $lineRemainder) + { last; }; + + $line = <SOURCEFILEHANDLE>; + + if (!defined $line) + { last; }; + + ::XChomp(\$line); + }; + + if ($lineRemainder !~ /^[ \t]*$/) + { + # If there was something past the closing symbol this wasn't an acceptable comment, so move the lines to code. + push @codeLines, @commentLines; + @commentLines = ( ); + }; + + $line = <SOURCEFILEHANDLE>; + } + + + # Otherwise just add it to the code. + + else + { + push @codeLines, $line; + $line = <SOURCEFILEHANDLE>; + }; + + + # If there were comments, send them to Parser->OnComment(). + + if (scalar @commentLines) + { + # First process any code lines before the comment. + if (scalar @codeLines) + { + $self->OnCode(\@codeLines, $lineNumber, $topicsList, $lastCommentTopicCount); + $lineNumber += scalar @codeLines; + @codeLines = ( ); + }; + + $lastCommentTopicCount = NaturalDocs::Parser->OnComment(\@commentLines, $lineNumber); + $lineNumber += scalar @commentLines; + @commentLines = ( ); + }; + + }; # while (defined $line) + + + # Clean up any remaining code. + if (scalar @codeLines) + { + $self->OnCode(\@codeLines, $lineNumber, $topicsList, $lastCommentTopicCount); + @codeLines = ( ); + }; + + }; + + close(SOURCEFILEHANDLE); + + return ( undef, undef ); + }; + + +# +# Function: OnCode +# +# Called whenever a section of code is encountered by the parser. Is used to find the prototype of the last topic created. +# +# Parameters: +# +# codeLines - The source code as an arrayref of lines. +# codeLineNumber - The line number of the first line of code. +# topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file. +# lastCommentTopicCount - The number of Natural Docs topics that were created by the last comment. +# +sub OnCode #(codeLines, codeLineNumber, topicList, lastCommentTopicCount) + { + my ($self, $codeLines, $codeLineNumber, $topicList, $lastCommentTopicCount) = @_; + + if ($lastCommentTopicCount && defined $self->PrototypeEndersFor($topicList->[-1]->Type())) + { + my $lineIndex = 0; + my $prototype; + + # Skip all blank lines before a prototype. + while ($lineIndex < scalar @$codeLines && $codeLines->[$lineIndex] =~ /^[ \t]*$/) + { $lineIndex++; }; + + my @tokens; + my $tokenIndex = 0; + + my @brackets; + my $enders = $self->PrototypeEndersFor($topicList->[-1]->Type()); + + # Add prototype lines until we reach the end of the prototype or the end of the code lines. + while ($lineIndex < scalar @$codeLines) + { + my $line = $self->RemoveLineExtender($codeLines->[$lineIndex] . "\n"); + + push @tokens, $line =~ /([^\(\)\[\]\{\}\<\>]+|.)/g; + + while ($tokenIndex < scalar @tokens) + { + # If we're not inside brackets, check for ender symbols. + if (!scalar @brackets) + { + my $startingIndex = 0; + my $testPrototype; + + for (;;) + { + my ($enderIndex, $ender) = ::FindFirstSymbol($tokens[$tokenIndex], $enders, $startingIndex); + + if ($enderIndex == -1) + { last; } + else + { + # We do this here so we don't duplicate prototype for every single token. Just the first time an ender symbol + # is found in one. + if (!defined $testPrototype) + { $testPrototype = $prototype; }; + + $testPrototype .= substr($tokens[$tokenIndex], $startingIndex, $enderIndex - $startingIndex); + + my $enderResult; + + # If the ender is all text and the character preceding or following it is as well, ignore it. + if ($ender =~ /^[a-z0-9]+$/i && + ( ($enderIndex > 0 && substr($tokens[$tokenIndex], $enderIndex - 1, 1) =~ /^[a-z0-9_]$/i) || + substr($tokens[$tokenIndex], $enderIndex + length($ender), 1) =~ /^[a-z0-9_]$/i ) ) + { $enderResult = ENDER_IGNORE(); } + else + { $enderResult = $self->OnPrototypeEnd($topicList->[-1]->Type(), \$testPrototype, $ender); } + + if ($enderResult == ENDER_IGNORE()) + { + $testPrototype .= $ender; + $startingIndex = $enderIndex + length($ender); + } + elsif ($enderResult == ENDER_REVERT_TO_ACCEPTED()) + { + return; + } + else # ENDER_ACCEPT || ENDER_ACCEPT_AND_CONTINUE + { + my $titleInPrototype = $topicList->[-1]->Title(); + + # Strip parenthesis so Function(2) and Function(int, int) will still match Function(anything). + $titleInPrototype =~ s/[\t ]*\([^\(]*$//; + + if (index($testPrototype, $titleInPrototype) != -1) + { + $topicList->[-1]->SetPrototype( $self->NormalizePrototype($testPrototype) ); + }; + + if ($enderResult == ENDER_ACCEPT()) + { return; } + else # ENDER_ACCEPT_AND_CONTINUE + { + $testPrototype .= $ender; + $startingIndex = $enderIndex + length($ender); + }; + }; + }; + }; + } + + # If we are inside brackets, check for closing symbols. + elsif ( ($tokens[$tokenIndex] eq ')' && $brackets[-1] eq '(') || + ($tokens[$tokenIndex] eq ']' && $brackets[-1] eq '[') || + ($tokens[$tokenIndex] eq '}' && $brackets[-1] eq '{') || + ($tokens[$tokenIndex] eq '>' && $brackets[-1] eq '<') ) + { + pop @brackets; + }; + + # Check for opening brackets. + if ($tokens[$tokenIndex] =~ /^[\(\[\{\<]$/) + { + push @brackets, $tokens[$tokenIndex]; + }; + + $prototype .= $tokens[$tokenIndex]; + $tokenIndex++; + }; + + $lineIndex++; + }; + + # If we got out of that while loop by running out of lines, there was no prototype. + }; + }; + + +use constant ENDER_ACCEPT => 1; +use constant ENDER_IGNORE => 2; +use constant ENDER_ACCEPT_AND_CONTINUE => 3; +use constant ENDER_REVERT_TO_ACCEPTED => 4; + +# +# Function: OnPrototypeEnd +# +# Called whenever the end of a prototype is found so that there's a chance for derived classes to mark false positives. +# +# Parameters: +# +# type - The <TopicType> of the prototype. +# prototypeRef - A reference to the prototype so far, minus the ender in dispute. +# ender - The ender symbol. +# +# Returns: +# +# ENDER_ACCEPT - The ender is accepted and the prototype is finished. +# ENDER_IGNORE - The ender is rejected and parsing should continue. Note that the prototype will be rejected as a whole +# if all enders are ignored before reaching the end of the code. +# ENDER_ACCEPT_AND_CONTINUE - The ender is accepted so the prototype may stand as is. However, the prototype might +# also continue on so continue parsing. If there is no accepted ender between here and +# the end of the code this version will be accepted instead. +# ENDER_REVERT_TO_ACCEPTED - The expedition from ENDER_ACCEPT_AND_CONTINUE failed. Use the last accepted +# version and end parsing. +# +sub OnPrototypeEnd #(type, prototypeRef, ender) + { + return ENDER_ACCEPT(); + }; + + +# +# Function: RemoveLineExtender +# +# If the passed line has a line extender, returns it without the extender or the line break that follows. If it doesn't, or there are +# no line extenders defined, returns the passed line unchanged. +# +sub RemoveLineExtender #(line) + { + my ($self, $line) = @_; + + if (defined $self->LineExtender()) + { + my $lineExtenderIndex = rindex($line, $self->LineExtender()); + + if ($lineExtenderIndex != -1 && + substr($line, $lineExtenderIndex + length($self->LineExtender())) =~ /^[ \t]*\n$/) + { + $line = substr($line, 0, $lineExtenderIndex) . ' '; + }; + }; + + return $line; + }; + + +1; |