177 lines
4.9 KiB
Ruby
177 lines
4.9 KiB
Ruby
|
module CodeRay
|
||
|
module Scanners
|
||
|
|
||
|
class Java < Scanner
|
||
|
|
||
|
include Streamable
|
||
|
register_for :java
|
||
|
helper :builtin_types
|
||
|
|
||
|
# http://java.sun.com/docs/books/tutorial/java/nutsandbolts/_keywords.html
|
||
|
KEYWORDS = %w[
|
||
|
assert break case catch continue default do else
|
||
|
finally for if instanceof import new package
|
||
|
return switch throw try typeof while
|
||
|
debugger export
|
||
|
]
|
||
|
RESERVED = %w[ const goto ]
|
||
|
CONSTANTS = %w[ false null true ]
|
||
|
MAGIC_VARIABLES = %w[ this super ]
|
||
|
TYPES = %w[
|
||
|
boolean byte char class double enum float int interface long
|
||
|
short void
|
||
|
] << '[]' # because int[] should be highlighted as a type
|
||
|
DIRECTIVES = %w[
|
||
|
abstract extends final implements native private protected public
|
||
|
static strictfp synchronized throws transient volatile
|
||
|
]
|
||
|
|
||
|
IDENT_KIND = WordList.new(:ident).
|
||
|
add(KEYWORDS, :keyword).
|
||
|
add(RESERVED, :reserved).
|
||
|
add(CONSTANTS, :pre_constant).
|
||
|
add(MAGIC_VARIABLES, :local_variable).
|
||
|
add(TYPES, :type).
|
||
|
add(BuiltinTypes::List, :pre_type).
|
||
|
add(BuiltinTypes::List.select { |builtin| builtin[/(Error|Exception)$/] }, :exception).
|
||
|
add(DIRECTIVES, :directive)
|
||
|
|
||
|
ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
|
||
|
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
|
||
|
STRING_CONTENT_PATTERN = {
|
||
|
"'" => /[^\\']+/,
|
||
|
'"' => /[^\\"]+/,
|
||
|
'/' => /[^\\\/]+/,
|
||
|
}
|
||
|
IDENT = /[a-zA-Z_][A-Za-z_0-9]*/
|
||
|
|
||
|
def scan_tokens tokens, options
|
||
|
|
||
|
state = :initial
|
||
|
string_delimiter = nil
|
||
|
import_clause = class_name_follows = last_token_dot = false
|
||
|
|
||
|
until eos?
|
||
|
|
||
|
kind = nil
|
||
|
match = nil
|
||
|
|
||
|
case state
|
||
|
|
||
|
when :initial
|
||
|
|
||
|
if match = scan(/ \s+ | \\\n /x)
|
||
|
tokens << [match, :space]
|
||
|
next
|
||
|
|
||
|
elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
||
|
tokens << [match, :comment]
|
||
|
next
|
||
|
|
||
|
elsif import_clause && scan(/ #{IDENT} (?: \. #{IDENT} )* /ox)
|
||
|
kind = :include
|
||
|
|
||
|
elsif match = scan(/ #{IDENT} | \[\] /ox)
|
||
|
kind = IDENT_KIND[match]
|
||
|
if last_token_dot
|
||
|
kind = :ident
|
||
|
elsif class_name_follows
|
||
|
kind = :class
|
||
|
class_name_follows = false
|
||
|
else
|
||
|
import_clause = true if match == 'import'
|
||
|
class_name_follows = true if match == 'class' || match == 'interface'
|
||
|
end
|
||
|
|
||
|
elsif scan(/ \.(?!\d) | [,?:()\[\]}] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? | <<<?=? | >>>?=? /x)
|
||
|
kind = :operator
|
||
|
|
||
|
elsif scan(/;/)
|
||
|
import_clause = false
|
||
|
kind = :operator
|
||
|
|
||
|
elsif scan(/\{/)
|
||
|
class_name_follows = false
|
||
|
kind = :operator
|
||
|
|
||
|
elsif check(/[\d.]/)
|
||
|
if scan(/0[xX][0-9A-Fa-f]+/)
|
||
|
kind = :hex
|
||
|
elsif scan(/(?>0[0-7]+)(?![89.eEfF])/)
|
||
|
kind = :oct
|
||
|
elsif scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
|
||
|
kind = :float
|
||
|
elsif scan(/\d+[lL]?/)
|
||
|
kind = :integer
|
||
|
end
|
||
|
|
||
|
elsif match = scan(/["']/)
|
||
|
tokens << [:open, :string]
|
||
|
state = :string
|
||
|
string_delimiter = match
|
||
|
kind = :delimiter
|
||
|
|
||
|
elsif scan(/ @ #{IDENT} /ox)
|
||
|
kind = :annotation
|
||
|
|
||
|
else
|
||
|
getch
|
||
|
kind = :error
|
||
|
|
||
|
end
|
||
|
|
||
|
when :string
|
||
|
if scan(STRING_CONTENT_PATTERN[string_delimiter])
|
||
|
kind = :content
|
||
|
elsif match = scan(/["'\/]/)
|
||
|
tokens << [match, :delimiter]
|
||
|
tokens << [:close, state]
|
||
|
string_delimiter = nil
|
||
|
state = :initial
|
||
|
next
|
||
|
elsif state == :string && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
|
||
|
if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
|
||
|
kind = :content
|
||
|
else
|
||
|
kind = :char
|
||
|
end
|
||
|
elsif scan(/\\./m)
|
||
|
kind = :content
|
||
|
elsif scan(/ \\ | $ /x)
|
||
|
tokens << [:close, :delimiter]
|
||
|
kind = :error
|
||
|
state = :initial
|
||
|
else
|
||
|
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
|
||
|
end
|
||
|
|
||
|
else
|
||
|
raise_inspect 'Unknown state', tokens
|
||
|
|
||
|
end
|
||
|
|
||
|
match ||= matched
|
||
|
if $CODERAY_DEBUG and not kind
|
||
|
raise_inspect 'Error token %p in line %d' %
|
||
|
[[match, kind], line], tokens
|
||
|
end
|
||
|
raise_inspect 'Empty token', tokens unless match
|
||
|
|
||
|
last_token_dot = match == '.'
|
||
|
|
||
|
tokens << [match, kind]
|
||
|
|
||
|
end
|
||
|
|
||
|
if state == :string
|
||
|
tokens << [:close, state]
|
||
|
end
|
||
|
|
||
|
tokens
|
||
|
end
|
||
|
|
||
|
end
|
||
|
|
||
|
end
|
||
|
end
|