175 lines
5.4 KiB
Ruby
175 lines
5.4 KiB
Ruby
|
module CodeRay
|
||
|
module Scanners
|
||
|
|
||
|
# Scanner for Java.
|
||
|
class Java < Scanner
|
||
|
|
||
|
register_for :java
|
||
|
|
||
|
autoload :BuiltinTypes, 'coderay/scanners/java/builtin_types'
|
||
|
|
||
|
# http://java.sun.com/docs/books/tutorial/java/nutsandbolts/_keywords.html
|
||
|
KEYWORDS = %w[
|
||
|
assert break case catch continue default do else
|
||
|
finally for if instanceof import new package
|
||
|
return switch throw try typeof while
|
||
|
debugger export
|
||
|
] # :nodoc:
|
||
|
RESERVED = %w[ const goto ] # :nodoc:
|
||
|
CONSTANTS = %w[ false null true ] # :nodoc:
|
||
|
MAGIC_VARIABLES = %w[ this super ] # :nodoc:
|
||
|
TYPES = %w[
|
||
|
boolean byte char class double enum float int interface long
|
||
|
short void
|
||
|
] << '[]' # :nodoc: because int[] should be highlighted as a type
|
||
|
DIRECTIVES = %w[
|
||
|
abstract extends final implements native private protected public
|
||
|
static strictfp synchronized throws transient volatile
|
||
|
] # :nodoc:
|
||
|
|
||
|
IDENT_KIND = WordList.new(:ident).
|
||
|
add(KEYWORDS, :keyword).
|
||
|
add(RESERVED, :reserved).
|
||
|
add(CONSTANTS, :predefined_constant).
|
||
|
add(MAGIC_VARIABLES, :local_variable).
|
||
|
add(TYPES, :type).
|
||
|
add(BuiltinTypes::List, :predefined_type).
|
||
|
add(BuiltinTypes::List.select { |builtin| builtin[/(Error|Exception)$/] }, :exception).
|
||
|
add(DIRECTIVES, :directive) # :nodoc:
|
||
|
|
||
|
ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
|
||
|
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
|
||
|
STRING_CONTENT_PATTERN = {
|
||
|
"'" => /[^\\']+/,
|
||
|
'"' => /[^\\"]+/,
|
||
|
'/' => /[^\\\/]+/,
|
||
|
} # :nodoc:
|
||
|
IDENT = /[a-zA-Z_][A-Za-z_0-9]*/ # :nodoc:
|
||
|
|
||
|
protected
|
||
|
|
||
|
def scan_tokens encoder, options
|
||
|
|
||
|
state = :initial
|
||
|
string_delimiter = nil
|
||
|
package_name_expected = false
|
||
|
class_name_follows = false
|
||
|
last_token_dot = false
|
||
|
|
||
|
until eos?
|
||
|
|
||
|
case state
|
||
|
|
||
|
when :initial
|
||
|
|
||
|
if match = scan(/ \s+ | \\\n /x)
|
||
|
encoder.text_token match, :space
|
||
|
next
|
||
|
|
||
|
elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
||
|
encoder.text_token match, :comment
|
||
|
next
|
||
|
|
||
|
elsif package_name_expected && match = scan(/ #{IDENT} (?: \. #{IDENT} )* /ox)
|
||
|
encoder.text_token match, package_name_expected
|
||
|
|
||
|
elsif match = scan(/ #{IDENT} | \[\] /ox)
|
||
|
kind = IDENT_KIND[match]
|
||
|
if last_token_dot
|
||
|
kind = :ident
|
||
|
elsif class_name_follows
|
||
|
kind = :class
|
||
|
class_name_follows = false
|
||
|
else
|
||
|
case match
|
||
|
when 'import'
|
||
|
package_name_expected = :include
|
||
|
when 'package'
|
||
|
package_name_expected = :namespace
|
||
|
when 'class', 'interface'
|
||
|
class_name_follows = true
|
||
|
end
|
||
|
end
|
||
|
encoder.text_token match, kind
|
||
|
|
||
|
elsif match = scan(/ \.(?!\d) | [,?:()\[\]}] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? | <<<?=? | >>>?=? /x)
|
||
|
encoder.text_token match, :operator
|
||
|
|
||
|
elsif match = scan(/;/)
|
||
|
package_name_expected = false
|
||
|
encoder.text_token match, :operator
|
||
|
|
||
|
elsif match = scan(/\{/)
|
||
|
class_name_follows = false
|
||
|
encoder.text_token match, :operator
|
||
|
|
||
|
elsif check(/[\d.]/)
|
||
|
if match = scan(/0[xX][0-9A-Fa-f]+/)
|
||
|
encoder.text_token match, :hex
|
||
|
elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
|
||
|
encoder.text_token match, :octal
|
||
|
elsif match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
|
||
|
encoder.text_token match, :float
|
||
|
elsif match = scan(/\d+[lL]?/)
|
||
|
encoder.text_token match, :integer
|
||
|
end
|
||
|
|
||
|
elsif match = scan(/["']/)
|
||
|
state = :string
|
||
|
encoder.begin_group state
|
||
|
string_delimiter = match
|
||
|
encoder.text_token match, :delimiter
|
||
|
|
||
|
elsif match = scan(/ @ #{IDENT} /ox)
|
||
|
encoder.text_token match, :annotation
|
||
|
|
||
|
else
|
||
|
encoder.text_token getch, :error
|
||
|
|
||
|
end
|
||
|
|
||
|
when :string
|
||
|
if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
|
||
|
encoder.text_token match, :content
|
||
|
elsif match = scan(/["'\/]/)
|
||
|
encoder.text_token match, :delimiter
|
||
|
encoder.end_group state
|
||
|
state = :initial
|
||
|
string_delimiter = nil
|
||
|
elsif state == :string && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
|
||
|
if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
|
||
|
encoder.text_token match, :content
|
||
|
else
|
||
|
encoder.text_token match, :char
|
||
|
end
|
||
|
elsif match = scan(/\\./m)
|
||
|
encoder.text_token match, :content
|
||
|
elsif match = scan(/ \\ | $ /x)
|
||
|
encoder.end_group state
|
||
|
state = :initial
|
||
|
encoder.text_token match, :error
|
||
|
else
|
||
|
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
|
||
|
end
|
||
|
|
||
|
else
|
||
|
raise_inspect 'Unknown state', encoder
|
||
|
|
||
|
end
|
||
|
|
||
|
last_token_dot = match == '.'
|
||
|
|
||
|
end
|
||
|
|
||
|
if state == :string
|
||
|
encoder.end_group state
|
||
|
end
|
||
|
|
||
|
encoder
|
||
|
end
|
||
|
|
||
|
end
|
||
|
|
||
|
end
|
||
|
end
|