diff options
| author | Magnus Auvinen <magnus.auvinen@gmail.com> | 2008-08-02 08:21:29 +0000 |
|---|---|---|
| committer | Magnus Auvinen <magnus.auvinen@gmail.com> | 2008-08-02 08:21:29 +0000 |
| commit | 61bfe2d70cae6be8c4086a210a5451135ccca9ea (patch) | |
| tree | 62bf7808b1b2bfe5f56fe1e329871fb0991d0687 /docs/tool/Modules/NaturalDocs/Languages/ActionScript.pm | |
| parent | a13b94f9e0bca8ea892311d9d9e0c0bc48616ea7 (diff) | |
| download | zcatch-61bfe2d70cae6be8c4086a210a5451135ccca9ea.tar.gz zcatch-61bfe2d70cae6be8c4086a210a5451135ccca9ea.zip | |
added doc tool
Diffstat (limited to 'docs/tool/Modules/NaturalDocs/Languages/ActionScript.pm')
| -rw-r--r-- | docs/tool/Modules/NaturalDocs/Languages/ActionScript.pm | 1473 |
1 files changed, 1473 insertions, 0 deletions
diff --git a/docs/tool/Modules/NaturalDocs/Languages/ActionScript.pm b/docs/tool/Modules/NaturalDocs/Languages/ActionScript.pm new file mode 100644 index 00000000..a55abaf2 --- /dev/null +++ b/docs/tool/Modules/NaturalDocs/Languages/ActionScript.pm @@ -0,0 +1,1473 @@ +############################################################################### +# +# Class: NaturalDocs::Languages::ActionScript +# +############################################################################### +# +# A subclass to handle the language variations of Flash ActionScript. +# +############################################################################### + +# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure +# Natural Docs is licensed under the GPL + +use strict; +use integer; + +package NaturalDocs::Languages::ActionScript; + +use base 'NaturalDocs::Languages::Advanced'; + + +################################################################################ +# Group: Constants and Types + + +# +# Constants: XML Tag Type +# +# XML_OPENING_TAG - The tag is an opening one, such as <tag>. +# XML_CLOSING_TAG - The tag is a closing one, such as </tag>. +# XML_SELF_CONTAINED_TAG - The tag is self contained, such as <tag />. +# +use constant XML_OPENING_TAG => 1; +use constant XML_CLOSING_TAG => 2; +use constant XML_SELF_CONTAINED_TAG => 3; + + +################################################################################ +# Group: Package Variables + +# +# hash: classModifiers +# An existence hash of all the acceptable class modifiers. The keys are in all lowercase. +# +my %classModifiers = ( 'dynamic' => 1, + 'intrinsic' => 1, + 'final' => 1, + 'internal' => 1, + 'public' => 1 ); + +# +# hash: memberModifiers +# An existence hash of all the acceptable class member modifiers. The keys are in all lowercase. +# +my %memberModifiers = ( 'public' => 1, + 'private' => 1, + 'protected' => 1, + 'static' => 1, + 'internal' => 1, + 'override' => 1 ); + + +# +# hash: declarationEnders +# An existence hash of all the tokens that can end a declaration. This is important because statements don't require a semicolon +# to end. The keys are in all lowercase. +# +my %declarationEnders = ( ';' => 1, + '}' => 1, + '{' => 1, + 'public' => 1, + 'private' => 1, + 'protected' => 1, + 'static' => 1, + 'internal' => 1, + 'dynamic' => 1, + 'intrinsic' => 1, + 'final' => 1, + 'override' => 1, + 'class' => 1, + 'interface' => 1, + 'var' => 1, + 'function' => 1, + 'const' => 1, + 'namespace' => 1, + 'import' => 1 ); + + +# +# var: isEscaped +# Whether the current file being parsed uses escapement. +# +my $isEscaped; + + + +################################################################################ +# Group: Interface Functions + + +# +# Function: PackageSeparator +# Returns the package separator symbol. +# +sub PackageSeparator + { return '.'; }; + + +# +# Function: EnumValues +# Returns the <EnumValuesType> that describes how the language handles enums. +# +sub EnumValues + { return ::ENUM_GLOBAL(); }; + + +# +# Function: ParseParameterLine +# Parses a prototype parameter line and returns it as a <NaturalDocs::Languages::Prototype::Parameter> object. +# +sub ParseParameterLine #(line) + { + my ($self, $line) = @_; + + if ($line =~ /^ ?\.\.\.\ (.+)$/) + { + # This puts them in the wrong fields as $1 should be the name and ... should be the type. However, this is necessary + # because the order in the source is reversed from other parameter declarations and it's more important for the output + # to match the source. + return NaturalDocs::Languages::Prototype::Parameter->New($1, undef, '...', undef, undef, undef); + } + else + { return $self->ParsePascalParameterLine($line); }; + }; + + +# +# Function: TypeBeforeParameter +# Returns whether the type appears before the parameter in prototypes. +# +sub TypeBeforeParameter + { return 0; }; + + +# +# Function: PreprocessFile +# +# If the file is escaped, strips out all unescaped code. Will translate any unescaped comments into comments surrounded by +# "\x1C\x1D\x1E\x1F" and "\x1F\x1E\x1D" characters, so chosen because they are the same character lengths as <!-- and --> +# and will not appear in normal code. +# +sub PreprocessFile + { + my ($self, $lines) = @_; + + if (!$isEscaped) + { return; }; + + use constant MODE_UNESCAPED_REGULAR => 1; + use constant MODE_UNESCAPED_PI => 2; + use constant MODE_UNESCAPED_CDATA => 3; + use constant MODE_UNESCAPED_COMMENT => 4; + use constant MODE_ESCAPED_UNKNOWN_CDATA => 5; + use constant MODE_ESCAPED_CDATA => 6; + use constant MODE_ESCAPED_NO_CDATA => 7; + + my $mode = MODE_UNESCAPED_REGULAR; + + for (my $i = 0; $i < scalar @$lines; $i++) + { + my @tokens = split(/(<[ \t]*\/?[ \t]*mx:Script[^>]*>|<\?|\?>|<\!--|-->|<\!\[CDATA\[|\]\]\>)/, $lines->[$i]); + my $newLine; + + foreach my $token (@tokens) + { + if ($mode == MODE_UNESCAPED_REGULAR) + { + if ($token eq '<?') + { $mode = MODE_UNESCAPED_PI; } + elsif ($token eq '<![CDATA[') + { $mode = MODE_UNESCAPED_CDATA; } + elsif ($token eq '<!--') + { + $mode = MODE_UNESCAPED_COMMENT; + $newLine .= "\x1C\x1D\x1E\x1F"; + } + elsif ($token =~ /^<[ \t]*mx:Script/) + { $mode = MODE_ESCAPED_UNKNOWN_CDATA; }; + } + + elsif ($mode == MODE_UNESCAPED_PI) + { + if ($token eq '?>') + { $mode = MODE_UNESCAPED_REGULAR; }; + } + + elsif ($mode == MODE_UNESCAPED_CDATA) + { + if ($token eq ']]>') + { $mode = MODE_UNESCAPED_REGULAR; }; + } + + elsif ($mode == MODE_UNESCAPED_COMMENT) + { + if ($token eq '-->') + { + $mode = MODE_UNESCAPED_REGULAR; + $newLine .= "\x1F\x1E\x1D"; + } + else + { $newLine .= $token; }; + } + + elsif ($mode == MODE_ESCAPED_UNKNOWN_CDATA) + { + if ($token eq '<![CDATA[') + { $mode = MODE_ESCAPED_CDATA; } + elsif ($token =~ /^<[ \t]*\/[ \t]*mx:Script/) + { + $mode = MODE_UNESCAPED_REGULAR; + $newLine .= '; '; + } + elsif ($token !~ /^[ \t]*$/) + { + $mode = MODE_ESCAPED_NO_CDATA; + $newLine .= $token; + }; + } + + elsif ($mode == MODE_ESCAPED_CDATA) + { + if ($token eq ']]>') + { + $mode = MODE_UNESCAPED_REGULAR; + $newLine .= '; '; + } + else + { $newLine .= $token; }; + } + + else #($mode == MODE_ESCAPED_NO_CDATA) + { + if ($token =~ /^<[ \t]*\/[ \t]*mx:Script/) + { + $mode = MODE_UNESCAPED_REGULAR; + $newLine .= '; '; + } + else + { $newLine .= $token; }; + }; + + }; + + $lines->[$i] = $newLine; + }; + }; + + +# +# Function: ParseFile +# +# Parses the passed source file, sending comments acceptable for documentation to <NaturalDocs::Parser->OnComment()>. +# +# Parameters: +# +# sourceFile - The <FileName> to parse. +# topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file. +# +# Returns: +# +# The array ( autoTopics, scopeRecord ). +# +# autoTopics - An arrayref of automatically generated topics from the file, or undef if none. +# scopeRecord - An arrayref of <NaturalDocs::Languages::Advanced::ScopeChanges>, or undef if none. +# +sub ParseFile #(sourceFile, topicsList) + { + my ($self, $sourceFile, $topicsList) = @_; + + # The \x1# comment symbols are inserted by PreprocessFile() to stand in for XML comments in escaped files. + my @parseParameters = ( [ '//' ], [ '/*', '*/', "\x1C\x1D\x1E\x1F", "\x1F\x1E\x1D" ], [ '///' ], [ '/**', '*/' ] ); + + my $extension = lc(NaturalDocs::File->ExtensionOf($sourceFile)); + $isEscaped = ($extension eq 'mxml'); + + $self->ParseForCommentsAndTokens($sourceFile, @parseParameters); + + my $tokens = $self->Tokens(); + my $index = 0; + my $lineNumber = 1; + + while ($index < scalar @$tokens) + { + if ($self->TryToSkipWhitespace(\$index, \$lineNumber) || + $self->TryToGetImport(\$index, \$lineNumber) || + $self->TryToGetClass(\$index, \$lineNumber) || + $self->TryToGetFunction(\$index, \$lineNumber) || + $self->TryToGetVariable(\$index, \$lineNumber) ) + { + # The functions above will handle everything. + } + + elsif ($tokens->[$index] eq '{') + { + $self->StartScope('}', $lineNumber, undef, undef, undef); + $index++; + } + + elsif ($tokens->[$index] eq '}') + { + if ($self->ClosingScopeSymbol() eq '}') + { $self->EndScope($lineNumber); }; + + $index++; + } + + else + { + $self->SkipToNextStatement(\$index, \$lineNumber); + }; + }; + + + # Don't need to keep these around. + $self->ClearTokens(); + + + my $autoTopics = $self->AutoTopics(); + + my $scopeRecord = $self->ScopeRecord(); + if (defined $scopeRecord && !scalar @$scopeRecord) + { $scopeRecord = undef; }; + + return ( $autoTopics, $scopeRecord ); + }; + + + +################################################################################ +# Group: Statement Parsing Functions +# All functions here assume that the current position is at the beginning of a statement. +# +# Note for developers: I am well aware that the code in these functions do not check if we're past the end of the tokens as +# often as it should. We're making use of the fact that Perl will always return undef in these cases to keep the code simpler. + + +# +# Function: TryToGetIdentifier +# +# Determines whether the position is at an identifier, and if so, skips it and returns the complete identifier as a string. Returns +# undef otherwise. +# +# Parameters: +# +# indexRef - A reference to the current token index. +# lineNumberRef - A reference to the current line number. +# allowStar - If set, allows the last identifier to be a star. +# +sub TryToGetIdentifier #(indexRef, lineNumberRef, allowStar) + { + my ($self, $indexRef, $lineNumberRef, $allowStar) = @_; + my $tokens = $self->Tokens(); + + my $index = $$indexRef; + + use constant MODE_IDENTIFIER_START => 1; + use constant MODE_IN_IDENTIFIER => 2; + use constant MODE_AFTER_STAR => 3; + + my $identifier; + my $mode = MODE_IDENTIFIER_START; + + while ($index < scalar @$tokens) + { + if ($mode == MODE_IDENTIFIER_START) + { + if ($tokens->[$index] =~ /^[a-z\$\_]/i) + { + $identifier .= $tokens->[$index]; + $index++; + + $mode = MODE_IN_IDENTIFIER; + } + elsif ($allowStar && $tokens->[$index] eq '*') + { + $identifier .= '*'; + $index++; + + $mode = MODE_AFTER_STAR; + } + else + { return undef; }; + } + + elsif ($mode == MODE_IN_IDENTIFIER) + { + if ($tokens->[$index] eq '.') + { + $identifier .= '.'; + $index++; + + $mode = MODE_IDENTIFIER_START; + } + elsif ($tokens->[$index] =~ /^[a-z0-9\$\_]/i) + { + $identifier .= $tokens->[$index]; + $index++; + } + else + { last; }; + } + + else #($mode == MODE_AFTER_STAR) + { + if ($tokens->[$index] =~ /^[a-z0-9\$\_\.]/i) + { return undef; } + else + { last; }; + }; + }; + + # We need to check again because we may have run out of tokens after a dot. + if ($mode != MODE_IDENTIFIER_START) + { + $$indexRef = $index; + return $identifier; + } + else + { return undef; }; + }; + + +# +# Function: TryToGetImport +# +# Determines whether the position is at a import statement, and if so, adds it as a Using statement to the current scope, skips +# it, and returns true. +# +sub TryToGetImport #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $index = $$indexRef; + my $lineNumber = $$lineNumberRef; + + if ($tokens->[$index] ne 'import') + { return undef; }; + + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + my $identifier = $self->TryToGetIdentifier(\$index, \$lineNumber, 1); + if (!$identifier) + { return undef; }; + + + # Currently we implement importing by stripping the last package level and treating it as a using. So "import p1.p2.p3" makes + # p1.p2 the using path, which is over-tolerant but that's okay. "import p1.p2.*" is treated the same way, but in this case it's + # not over-tolerant. If there's no dot, there's no point to including it. + + if (index($identifier, '.') != -1) + { + $identifier =~ s/\.[^\.]+$//; + $self->AddUsing( NaturalDocs::SymbolString->FromText($identifier) ); + }; + + $$indexRef = $index; + $$lineNumberRef = $lineNumber; + + return 1; + }; + + +# +# Function: TryToGetClass +# +# Determines whether the position is at a class declaration statement, and if so, generates a topic for it, skips it, and +# returns true. +# +# Supported Syntaxes: +# +# - Classes +# - Interfaces +# - Classes and interfaces with _global +# +sub TryToGetClass #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $index = $$indexRef; + my $lineNumber = $$lineNumberRef; + + my @modifiers; + + while ($tokens->[$index] =~ /^[a-z]/i && + exists $classModifiers{lc($tokens->[$index])} ) + { + push @modifiers, lc($tokens->[$index]); + $index++; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + }; + + my $type; + + if ($tokens->[$index] eq 'class' || $tokens->[$index] eq 'interface') + { + $type = $tokens->[$index]; + + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + } + else + { return undef; }; + + my $className = $self->TryToGetIdentifier(\$index, \$lineNumber); + + if (!$className) + { return undef; }; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + my @parents; + + if ($tokens->[$index] eq 'extends') + { + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + my $parent = $self->TryToGetIdentifier(\$index, \$lineNumber); + if (!$parent) + { return undef; }; + + push @parents, $parent; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + }; + + if ($type eq 'class' && $tokens->[$index] eq 'implements') + { + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + for (;;) + { + my $parent = $self->TryToGetIdentifier(\$index, \$lineNumber); + if (!$parent) + { return undef; }; + + push @parents, $parent; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + if ($tokens->[$index] ne ',') + { last; } + else + { + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + }; + }; + }; + + if ($tokens->[$index] ne '{') + { return undef; }; + + $index++; + + + # If we made it this far, we have a valid class declaration. + + my $topicType; + + if ($type eq 'interface') + { $topicType = ::TOPIC_INTERFACE(); } + else + { $topicType = ::TOPIC_CLASS(); }; + + $className =~ s/^_global.//; + + my $autoTopic = NaturalDocs::Parser::ParsedTopic->New($topicType, $className, + undef, $self->CurrentUsing(), + undef, + undef, undef, $$lineNumberRef); + + $self->AddAutoTopic($autoTopic); + NaturalDocs::Parser->OnClass($autoTopic->Package()); + + foreach my $parent (@parents) + { + NaturalDocs::Parser->OnClassParent($autoTopic->Package(), NaturalDocs::SymbolString->FromText($parent), + undef, $self->CurrentUsing(), ::RESOLVE_ABSOLUTE()); + }; + + $self->StartScope('}', $lineNumber, $autoTopic->Package()); + + $$indexRef = $index; + $$lineNumberRef = $lineNumber; + + return 1; + }; + + +# +# Function: TryToGetFunction +# +# Determines if the position is on a function declaration, and if so, generates a topic for it, skips it, and returns true. +# +# Supported Syntaxes: +# +# - Functions +# - Constructors +# - Properties +# - Functions with _global +# - Functions with namespaces +# +sub TryToGetFunction #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $index = $$indexRef; + my $lineNumber = $$lineNumberRef; + + my $startIndex = $index; + my $startLine = $lineNumber; + + my @modifiers; + my $namespace; + + while ($tokens->[$index] =~ /^[a-z]/i) + { + if ($tokens->[$index] eq 'function') + { last; } + + elsif (exists $memberModifiers{lc($tokens->[$index])}) + { + push @modifiers, lc($tokens->[$index]); + $index++; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + } + + elsif (!$namespace) + { + do + { + $namespace .= $tokens->[$index]; + $index++; + } + while ($tokens->[$index] =~ /^[a-z0-9_]/i); + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + } + + else + { last; }; + }; + + if ($tokens->[$index] ne 'function') + { return undef; }; + $index++; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + my $type; + + if ($tokens->[$index] eq 'get' || $tokens->[$index] eq 'set') + { + # This can either be a property ("function get Something()") or a function name ("function get()"). + + my $nextIndex = $index; + my $nextLineNumber = $lineNumber; + + $nextIndex++; + $self->TryToSkipWhitespace(\$nextIndex, \$nextLineNumber); + + if ($tokens->[$nextIndex] eq '(') + { + $type = ::TOPIC_FUNCTION(); + # Ignore the movement and let the code ahead pick it up as the name. + } + else + { + $type = ::TOPIC_PROPERTY(); + $index = $nextIndex; + $lineNumber = $nextLineNumber; + }; + } + else + { $type = ::TOPIC_FUNCTION(); }; + + my $name = $self->TryToGetIdentifier(\$index, \$lineNumber); + if (!$name) + { return undef; }; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + if ($tokens->[$index] ne '(') + { return undef; }; + + $index++; + $self->GenericSkipUntilAfter(\$index, \$lineNumber, ')'); + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + if ($tokens->[$index] eq ':') + { + $index++; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + $self->TryToGetIdentifier(\$index, \$lineNumber, 1); + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + }; + + + my $prototype = $self->NormalizePrototype( $self->CreateString($startIndex, $index) ); + + if ($tokens->[$index] eq '{') + { $self->GenericSkip(\$index, \$lineNumber); } + elsif (!exists $declarationEnders{$tokens->[$index]}) + { return undef; }; + + + my $scope = $self->CurrentScope(); + + if ($name =~ s/^_global.//) + { $scope = undef; }; + if ($namespace) + { $scope = NaturalDocs::SymbolString->Join($scope, $namespace); }; + + $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New($type, $name, + $scope, $self->CurrentUsing(), + $prototype, + undef, undef, $startLine)); + + + # We succeeded if we got this far. + + $$indexRef = $index; + $$lineNumberRef = $lineNumber; + + return 1; + }; + + +# +# Function: TryToGetVariable +# +# Determines if the position is on a variable declaration statement, and if so, generates a topic for each variable, skips the +# statement, and returns true. +# +# Supported Syntaxes: +# +# - Variables +# - Variables with _global +# - Variables with type * (untyped) +# - Constants +# - Variables and constants with namespaces +# +sub TryToGetVariable #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $index = $$indexRef; + my $lineNumber = $$lineNumberRef; + + my $startIndex = $index; + my $startLine = $lineNumber; + + my @modifiers; + my $namespace; + + while ($tokens->[$index] =~ /^[a-z]/i) + { + if ($tokens->[$index] eq 'var' || $tokens->[$index] eq 'const') + { last; } + + elsif (exists $memberModifiers{lc($tokens->[$index])}) + { + push @modifiers, lc($tokens->[$index]); + $index++; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + } + + elsif (!$namespace) + { + do + { + $namespace .= $tokens->[$index]; + $index++; + } + while ($tokens->[$index] =~ /^[a-z0-9_]/i); + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + } + + else + { last; }; + }; + + my $type; + + if ($tokens->[$index] eq 'var') + { $type = ::TOPIC_VARIABLE(); } + elsif ($tokens->[$index] eq 'const') + { $type = ::TOPIC_CONSTANT(); } + else + { return undef; }; + $index++; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + my $endTypeIndex = $index; + my @names; + my @types; + + for (;;) + { + my $name = $self->TryToGetIdentifier(\$index, \$lineNumber); + if (!$name) + { return undef; }; + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + my $type; + + if ($tokens->[$index] eq ':') + { + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + + $type = ': ' . $self->TryToGetIdentifier(\$index, \$lineNumber, 1); + + $self->TryToSkipWhitespace(\$index, \$lineNumber); + }; + + if ($tokens->[$index] eq '=') + { + do + { + $self->GenericSkip(\$index, \$lineNumber); + } + while ($tokens->[$index] ne ',' && !exists $declarationEnders{$tokens->[$index]} && $index < scalar @$tokens); + }; + + push @names, $name; + push @types, $type; + + if ($tokens->[$index] eq ',') + { + $index++; + $self->TryToSkipWhitespace(\$index, \$lineNumber); + } + elsif (exists $declarationEnders{$tokens->[$index]}) + { last; } + else + { return undef; }; + }; + + + # We succeeded if we got this far. + + my $prototypePrefix = $self->CreateString($startIndex, $endTypeIndex); + + for (my $i = 0; $i < scalar @names; $i++) + { + my $prototype = $self->NormalizePrototype( $prototypePrefix . ' ' . $names[$i] . $types[$i]); + my $scope = $self->CurrentScope(); + + if ($names[$i] =~ s/^_global.//) + { $scope = undef; }; + if ($namespace) + { $scope = NaturalDocs::SymbolString->Join($scope, $namespace); }; + + $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New($type, $names[$i], + $scope, $self->CurrentUsing(), + $prototype, + undef, undef, $startLine)); + }; + + $$indexRef = $index; + $$lineNumberRef = $lineNumber; + + return 1; + }; + + + +################################################################################ +# Group: Low Level Parsing Functions + + +# +# Function: GenericSkip +# +# Advances the position one place through general code. +# +# - If the position is on a string, it will skip it completely. +# - If the position is on an opening symbol, it will skip until the past the closing symbol. +# - If the position is on whitespace (including comments), it will skip it completely. +# - Otherwise it skips one token. +# +# Parameters: +# +# indexRef - A reference to the current index. +# lineNumberRef - A reference to the current line number. +# +sub GenericSkip #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + # We can ignore the scope stack because we're just skipping everything without parsing, and we need recursion anyway. + if ($tokens->[$$indexRef] eq '{') + { + $$indexRef++; + $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, '}'); + } + elsif ($tokens->[$$indexRef] eq '(') + { + $$indexRef++; + $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, ')'); + } + elsif ($tokens->[$$indexRef] eq '[') + { + $$indexRef++; + $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, ']'); + } + + elsif ($self->TryToSkipWhitespace($indexRef, $lineNumberRef) || + $self->TryToSkipString($indexRef, $lineNumberRef) || + $self->TryToSkipRegExp($indexRef, $lineNumberRef) || + $self->TryToSkipXML($indexRef, $lineNumberRef) ) + { + } + + else + { $$indexRef++; }; + }; + + +# +# Function: GenericSkipUntilAfter +# +# Advances the position via <GenericSkip()> until a specific token is reached and passed. +# +sub GenericSkipUntilAfter #(indexRef, lineNumberRef, token) + { + my ($self, $indexRef, $lineNumberRef, $token) = @_; + my $tokens = $self->Tokens(); + + while ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] ne $token) + { $self->GenericSkip($indexRef, $lineNumberRef); }; + + if ($tokens->[$$indexRef] eq "\n") + { $$lineNumberRef++; }; + $$indexRef++; + }; + + +# +# Function: IndiscriminateSkipUntilAfterSequence +# +# Advances the position indiscriminately until a specific token sequence is reached and passed. +# +sub IndiscriminateSkipUntilAfterSequence #(indexRef, lineNumberRef, token, token, ...) + { + my ($self, $indexRef, $lineNumberRef, @sequence) = @_; + my $tokens = $self->Tokens(); + + while ($$indexRef < scalar @$tokens && !$self->IsAtSequence($$indexRef, @sequence)) + { + if ($tokens->[$$indexRef] eq "\n") + { $$lineNumberRef++; }; + $$indexRef++; + }; + + if ($self->IsAtSequence($$indexRef, @sequence)) + { + $$indexRef += scalar @sequence; + foreach my $token (@sequence) + { + if ($token eq "\n") + { $$lineNumberRef++; }; + }; + }; + }; + + +# +# Function: SkipToNextStatement +# +# Advances the position via <GenericSkip()> until the next statement, which is defined as anything in <declarationEnders> not +# appearing in brackets or strings. It will always advance at least one token. +# +sub SkipToNextStatement #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + if ($tokens->[$$indexRef] eq ';') + { $$indexRef++; } + + else + { + do + { + $self->GenericSkip($indexRef, $lineNumberRef); + } + while ( $$indexRef < scalar @$tokens && + !exists $declarationEnders{$tokens->[$$indexRef]} ); + }; + }; + + +# +# Function: TryToSkipRegExp +# If the current position is on a regular expression, skip past it and return true. +# +sub TryToSkipRegExp #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + if ($tokens->[$$indexRef] eq '/') + { + # A slash can either start a regular expression or be a divide symbol. Skip backwards to see what the previous symbol is. + my $index = $$indexRef - 1; + + while ($index >= 0 && $tokens->[$index] =~ /^(?: |\t|\n)/) + { $index--; }; + + if ($index < 0 || $tokens->[$index] !~ /^\=\(\[\,]/) + { return 0; }; + + $$indexRef++; + + while ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] ne '/') + { + if ($tokens->[$$indexRef] eq '\\') + { $$indexRef += 2; } + elsif ($tokens->[$$indexRef] eq "\n") + { + $$indexRef++; + $$lineNumberRef++; + } + else + { $$indexRef++; } + }; + + if ($$indexRef < scalar @$tokens) + { + $$indexRef++; + + if ($tokens->[$$indexRef] =~ /^[gimsx]+$/i) + { $$indexRef++; }; + }; + + return 1; + } + else + { return 0; }; + }; + + +# +# Function: TryToSkipXML +# If the current position is on an XML literal, skip past it and return true. +# +sub TryToSkipXML #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + if ($tokens->[$$indexRef] eq '<') + { + # A < can either start an XML literal or be a comparison or shift operator. First check the next character for << or <=. + + my $index = $$indexRef + 1; + + while ($index < scalar @$tokens && $tokens->[$index] =~ /^[\=\<]$/) + { return 0; }; + + + # Next try the previous character. + + $index = $$indexRef - 1; + + while ($index >= 0 && $tokens->[$index] =~ /^[ |\t|\n]/) + { $index--; }; + + if ($index < 0 || $tokens->[$index] !~ /^[\=\(\[\,\>]/) + { return 0; }; + } + else + { return 0; }; + + + # Only handle the tag here if it's not an irregular XML section. + if (!$self->TryToSkipIrregularXML($indexRef, $lineNumberRef)) + { + my @tagStack; + + my ($tagType, $tagIdentifier) = $self->GetAndSkipXMLTag($indexRef, $lineNumberRef); + if ($tagType == XML_OPENING_TAG) + { push @tagStack, $tagIdentifier; }; + + while (scalar @tagStack && $$indexRef < scalar @$tokens) + { + $self->SkipToNextXMLTag($indexRef, $lineNumberRef); + ($tagType, $tagIdentifier) = $self->GetAndSkipXMLTag($indexRef, $lineNumberRef); + + if ($tagType == XML_OPENING_TAG) + { push @tagStack, $tagIdentifier; } + elsif ($tagType == XML_CLOSING_TAG && $tagIdentifier eq $tagStack[-1]) + { pop @tagStack; }; + }; + }; + + + return 1; + }; + + +# +# Function: TryToSkipIrregularXML +# +# If the current position is on an irregular XML tag, skip past it and return true. Irregular XML tags are defined as +# +# CDATA - <![CDATA[ ... ]]> +# Comments - <!-- ... --> +# PI - <? ... ?> +# +sub TryToSkipIrregularXML #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + + if ($self->IsAtSequence($$indexRef, '<', '!', '[', 'CDATA', '[')) + { + $$indexRef += 5; + $self->IndiscriminateSkipUntilAfterSequence($indexRef, $lineNumberRef, ']', ']', '>'); + return 1; + } + + elsif ($self->IsAtSequence($$indexRef, '<', '!', '-', '-')) + { + $$indexRef += 4; + $self->IndiscriminateSkipUntilAfterSequence($indexRef, $lineNumberRef, '-', '-', '>'); + return 1; + } + + elsif ($self->IsAtSequence($$indexRef, '<', '?')) + { + $$indexRef += 2; + $self->IndiscriminateSkipUntilAfterSequence($indexRef, $lineNumberRef, '?', '>'); + return 1; + } + + else + { return 0; }; + }; + + +# +# Function: GetAndSkipXMLTag +# +# Processes the XML tag at the current position, moves beyond it, and returns information about it. Assumes the position is on +# the opening angle bracket of the tag and the tag is a normal XML tag, not one of the ones handled by +# <TryToSkipIrregularXML()>. +# +# Parameters: +# +# indexRef - A reference to the index of the position of the opening angle bracket. +# lineNumberRef - A reference to the line number of the position of the opening angle bracket. +# +# Returns: +# +# The array ( tagType, name ). +# +# tagType - One of the <XML Tag Type> constants. +# identifier - The identifier of the tag. If it's an empty tag (<> or </>), this will be "(anonymous)". +# +sub GetAndSkipXMLTag #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + if ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] ne '<') + { die "Tried to call GetXMLTag when the position isn't on an opening bracket."; }; + + # Get the anonymous ones out of the way so we don't have to worry about them below, since they're rather exceptional. + + if ($self->IsAtSequence($$indexRef, '<', '>')) + { + $$indexRef += 2; + return ( XML_OPENING_TAG, '(anonymous)' ); + } + elsif ($self->IsAtSequence($$indexRef, '<', '/', '>')) + { + $$indexRef += 3; + return ( XML_CLOSING_TAG, '(anonymous)' ); + }; + + + # Grab the identifier. + + my $tagType = XML_OPENING_TAG; + my $identifier; + + $$indexRef++; + + if ($tokens->[$$indexRef] eq '/') + { + $$indexRef++; + $tagType = XML_CLOSING_TAG; + }; + + $self->TryToSkipXMLWhitespace($indexRef, $lineNumberRef); + + + # The identifier could be a native expression in braces. + + if ($tokens->[$$indexRef] eq '{') + { + my $startOfIdentifier = $$indexRef; + + $$indexRef++; + $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, '}'); + + $identifier = $self->CreateString($startOfIdentifier, $$indexRef); + } + + + # Otherwise just grab content until whitespace or the end of the tag. + + else + { + while ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] !~ /^[\/\>\ \t]$/) + { + $identifier .= $tokens->[$$indexRef]; + $$indexRef++; + }; + }; + + + # Skip to the end of the tag. + + while ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] !~ /^[\/\>]$/) + { + if ($tokens->[$$indexRef] eq '{') + { + $$indexRef++; + $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, '}'); + } + + elsif ($self->TryToSkipXMLWhitespace($indexRef, $lineNumberRef)) + { } + + # We don't need to do special handling for attribute quotes or anything like that because there's no backslashing in + # XML. It's all handled with entity characters. + else + { $$indexRef++; }; + }; + + + if ($tokens->[$$indexRef] eq '/') + { + if ($tagType == XML_OPENING_TAG) + { $tagType = XML_SELF_CONTAINED_TAG; }; + + $$indexRef++; + }; + + if ($tokens->[$$indexRef] eq '>') + { $$indexRef++; }; + + if (!$identifier) + { $identifier = '(anonymous)'; }; + + + return ( $tagType, $identifier ); + }; + + +# +# Function: SkipToNextXMLTag +# Skips to the next normal XML tag. It will not stop at elements handled by <TryToSkipIrregularXML()>. Note that if the +# position is already at an XML tag, it will not move. +# +sub SkipToNextXMLTag #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + while ($$indexRef < scalar @$tokens) + { + if ($tokens->[$$indexRef] eq '{') + { + $$indexRef++; + $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, '}'); + } + + elsif ($self->TryToSkipIrregularXML($indexRef, $lineNumberRef)) + { } + + elsif ($tokens->[$$indexRef] eq '<') + { last; } + + else + { + if ($tokens->[$$indexRef] eq "\n") + { $$lineNumberRef++; }; + + $$indexRef++; + }; + }; + }; + + +# +# Function: TryToSkipXMLWhitespace +# If the current position is on XML whitespace, skip past it and return true. +# +sub TryToSkipXMLWhitespace #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $result; + + while ($$indexRef < scalar @$tokens) + { + if ($tokens->[$$indexRef] =~ /^[ \t]/) + { + $$indexRef++; + $result = 1; + } + elsif ($tokens->[$$indexRef] eq "\n") + { + $$indexRef++; + $$lineNumberRef++; + $result = 1; + } + else + { last; }; + }; + + return $result; + }; + + +# +# Function: TryToSkipString +# If the current position is on a string delimiter, skip past the string and return true. +# +# Parameters: +# +# indexRef - A reference to the index of the position to start at. +# lineNumberRef - A reference to the line number of the position. +# +# Returns: +# +# Whether the position was at a string. +# +# Syntax Support: +# +# - Supports quotes and apostrophes. +# +sub TryToSkipString #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + + return ($self->SUPER::TryToSkipString($indexRef, $lineNumberRef, '\'') || + $self->SUPER::TryToSkipString($indexRef, $lineNumberRef, '"') ); + }; + + +# +# Function: TryToSkipWhitespace +# If the current position is on a whitespace token, a line break token, or a comment, it skips them and returns true. If there are +# a number of these in a row, it skips them all. +# +sub TryToSkipWhitespace #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + my $result; + + while ($$indexRef < scalar @$tokens) + { + if ($tokens->[$$indexRef] =~ /^[ \t]/) + { + $$indexRef++; + $result = 1; + } + elsif ($tokens->[$$indexRef] eq "\n") + { + $$indexRef++; + $$lineNumberRef++; + $result = 1; + } + elsif ($self->TryToSkipComment($indexRef, $lineNumberRef)) + { + $result = 1; + } + else + { last; }; + }; + + return $result; + }; + + +# +# Function: TryToSkipComment +# If the current position is on a comment, skip past it and return true. +# +sub TryToSkipComment #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + + return ( $self->TryToSkipLineComment($indexRef, $lineNumberRef) || + $self->TryToSkipMultilineComment($indexRef, $lineNumberRef) ); + }; + + +# +# Function: TryToSkipLineComment +# If the current position is on a line comment symbol, skip past it and return true. +# +sub TryToSkipLineComment #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + if ($tokens->[$$indexRef] eq '/' && $tokens->[$$indexRef+1] eq '/') + { + $self->SkipRestOfLine($indexRef, $lineNumberRef); + return 1; + } + else + { return undef; }; + }; + + +# +# Function: TryToSkipMultilineComment +# If the current position is on an opening comment symbol, skip past it and return true. +# +sub TryToSkipMultilineComment #(indexRef, lineNumberRef) + { + my ($self, $indexRef, $lineNumberRef) = @_; + my $tokens = $self->Tokens(); + + if ($tokens->[$$indexRef] eq '/' && $tokens->[$$indexRef+1] eq '*') + { + $self->SkipUntilAfter($indexRef, $lineNumberRef, '*', '/'); + return 1; + } + else + { return undef; }; + }; + + +1; |