2011-01-22 16:18:01 +03:00
|
|
|
# encoding: utf-8
|
2007-08-16 00:20:18 +04:00
|
|
|
module CodeRay
|
|
|
|
module Scanners
|
|
|
|
|
|
|
|
# This scanner is really complex, since Ruby _is_ a complex language!
|
|
|
|
#
|
|
|
|
# It tries to highlight 100% of all common code,
|
|
|
|
# and 90% of strange codes.
|
|
|
|
#
|
|
|
|
# It is optimized for HTML highlighting, and is not very useful for
|
|
|
|
# parsing or pretty printing.
|
|
|
|
#
|
|
|
|
# For now, I think it's better than the scanners in VIM or Syntax, or
|
|
|
|
# any highlighter I was able to find, except Caleb's RubyLexer.
|
|
|
|
#
|
|
|
|
# I hope it's also better than the rdoc/irb lexer.
|
|
|
|
class Ruby < Scanner
|
|
|
|
|
|
|
|
include Streamable
|
|
|
|
|
|
|
|
register_for :ruby
|
|
|
|
file_extension 'rb'
|
|
|
|
|
|
|
|
helper :patterns
|
2010-03-16 23:29:12 +03:00
|
|
|
|
|
|
|
if not defined? EncodingError
|
|
|
|
EncodingError = Class.new Exception
|
|
|
|
end
|
2007-08-16 00:20:18 +04:00
|
|
|
|
|
|
|
private
|
|
|
|
def scan_tokens tokens, options
|
2011-01-22 16:18:01 +03:00
|
|
|
if string.respond_to?(:encoding)
|
|
|
|
unless string.encoding == Encoding::UTF_8
|
|
|
|
self.string = string.encode Encoding::UTF_8,
|
|
|
|
:invalid => :replace, :undef => :replace, :replace => '?'
|
|
|
|
end
|
|
|
|
unicode = false
|
|
|
|
else
|
|
|
|
unicode = exist?(/[^\x00-\x7f]/)
|
|
|
|
end
|
|
|
|
|
2007-08-16 00:20:18 +04:00
|
|
|
last_token_dot = false
|
|
|
|
value_expected = true
|
|
|
|
heredocs = nil
|
|
|
|
last_state = nil
|
|
|
|
state = :initial
|
|
|
|
depth = nil
|
|
|
|
inline_block_stack = []
|
2011-01-22 16:18:01 +03:00
|
|
|
|
2010-03-16 23:29:12 +03:00
|
|
|
|
2007-08-16 00:20:18 +04:00
|
|
|
patterns = Patterns # avoid constant lookup
|
2010-03-16 23:29:12 +03:00
|
|
|
|
2007-08-16 00:20:18 +04:00
|
|
|
until eos?
|
|
|
|
match = nil
|
|
|
|
kind = nil
|
|
|
|
|
|
|
|
if state.instance_of? patterns::StringState
|
|
|
|
# {{{
|
|
|
|
match = scan_until(state.pattern) || scan_until(/\z/)
|
|
|
|
tokens << [match, :content] unless match.empty?
|
|
|
|
break if eos?
|
|
|
|
|
|
|
|
if state.heredoc and self[1] # end of heredoc
|
|
|
|
match = getch.to_s
|
|
|
|
match << scan_until(/$/) unless eos?
|
|
|
|
tokens << [match, :delimiter]
|
|
|
|
tokens << [:close, state.type]
|
|
|
|
state = state.next_state
|
|
|
|
next
|
|
|
|
end
|
|
|
|
|
|
|
|
case match = getch
|
|
|
|
|
|
|
|
when state.delim
|
|
|
|
if state.paren
|
|
|
|
state.paren_depth -= 1
|
|
|
|
if state.paren_depth > 0
|
|
|
|
tokens << [match, :nesting_delimiter]
|
|
|
|
next
|
|
|
|
end
|
|
|
|
end
|
|
|
|
tokens << [match, :delimiter]
|
|
|
|
if state.type == :regexp and not eos?
|
|
|
|
modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
|
|
|
|
tokens << [modifiers, :modifier] unless modifiers.empty?
|
|
|
|
end
|
|
|
|
tokens << [:close, state.type]
|
|
|
|
value_expected = false
|
|
|
|
state = state.next_state
|
|
|
|
|
|
|
|
when '\\'
|
|
|
|
if state.interpreted
|
|
|
|
if esc = scan(/ #{patterns::ESCAPE} /ox)
|
|
|
|
tokens << [match + esc, :char]
|
|
|
|
else
|
|
|
|
tokens << [match, :error]
|
|
|
|
end
|
|
|
|
else
|
|
|
|
case m = getch
|
|
|
|
when state.delim, '\\'
|
|
|
|
tokens << [match + m, :char]
|
|
|
|
when nil
|
|
|
|
tokens << [match, :error]
|
|
|
|
else
|
|
|
|
tokens << [match + m, :content]
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
when '#'
|
|
|
|
case peek(1)
|
|
|
|
when '{'
|
|
|
|
inline_block_stack << [state, depth, heredocs]
|
|
|
|
value_expected = true
|
|
|
|
state = :initial
|
|
|
|
depth = 1
|
|
|
|
tokens << [:open, :inline]
|
|
|
|
tokens << [match + getch, :inline_delimiter]
|
|
|
|
when '$', '@'
|
|
|
|
tokens << [match, :escape]
|
|
|
|
last_state = state # scan one token as normal code, then return here
|
|
|
|
state = :initial
|
|
|
|
else
|
|
|
|
raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens
|
|
|
|
end
|
|
|
|
|
|
|
|
when state.paren
|
|
|
|
state.paren_depth += 1
|
|
|
|
tokens << [match, :nesting_delimiter]
|
|
|
|
|
|
|
|
when /#{patterns::REGEXP_SYMBOLS}/ox
|
|
|
|
tokens << [match, :function]
|
|
|
|
|
|
|
|
else
|
|
|
|
raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens
|
|
|
|
|
|
|
|
end
|
|
|
|
next
|
|
|
|
# }}}
|
|
|
|
else
|
|
|
|
# {{{
|
|
|
|
if match = scan(/[ \t\f]+/)
|
|
|
|
kind = :space
|
2010-03-16 23:29:12 +03:00
|
|
|
match << scan(/\s*/) unless eos? || heredocs
|
|
|
|
value_expected = true if match.index(?\n)
|
2007-08-16 00:20:18 +04:00
|
|
|
tokens << [match, kind]
|
|
|
|
next
|
|
|
|
|
|
|
|
elsif match = scan(/\\?\n/)
|
|
|
|
kind = :space
|
|
|
|
if match == "\n"
|
2010-03-16 23:29:12 +03:00
|
|
|
value_expected = true
|
2007-08-16 00:20:18 +04:00
|
|
|
state = :initial if state == :undef_comma_expected
|
|
|
|
end
|
|
|
|
if heredocs
|
|
|
|
unscan # heredoc scanning needs \n at start
|
|
|
|
state = heredocs.shift
|
|
|
|
tokens << [:open, state.type]
|
|
|
|
heredocs = nil if heredocs.empty?
|
|
|
|
next
|
|
|
|
else
|
|
|
|
match << scan(/\s*/) unless eos?
|
|
|
|
end
|
|
|
|
tokens << [match, kind]
|
|
|
|
next
|
|
|
|
|
2010-03-16 23:29:12 +03:00
|
|
|
elsif bol? && match = scan(/\#!.*/)
|
|
|
|
tokens << [match, :doctype]
|
|
|
|
next
|
|
|
|
|
2007-08-16 00:20:18 +04:00
|
|
|
elsif match = scan(/\#.*/) or
|
|
|
|
( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
|
|
|
|
kind = :comment
|
|
|
|
tokens << [match, kind]
|
|
|
|
next
|
|
|
|
|
|
|
|
elsif state == :initial
|
|
|
|
|
|
|
|
# IDENTS #
|
2010-03-16 23:29:12 +03:00
|
|
|
if match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
|
|
|
|
/#{patterns::METHOD_NAME}/o)
|
2007-08-16 00:20:18 +04:00
|
|
|
if last_token_dot
|
|
|
|
kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
|
|
|
|
else
|
2011-01-22 16:18:01 +03:00
|
|
|
if value_expected != :expect_colon && scan(/:(?= )/)
|
|
|
|
tokens << [match, :key]
|
|
|
|
match = ':'
|
|
|
|
kind = :operator
|
|
|
|
else
|
|
|
|
kind = patterns::IDENT_KIND[match]
|
|
|
|
if kind == :ident
|
|
|
|
if match[/\A[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
|
|
|
|
kind = :constant
|
|
|
|
end
|
|
|
|
elsif kind == :reserved
|
|
|
|
state = patterns::DEF_NEW_STATE[match]
|
|
|
|
value_expected = :set if patterns::KEYWORDS_EXPECTING_VALUE[match]
|
|
|
|
end
|
2007-08-16 00:20:18 +04:00
|
|
|
end
|
|
|
|
end
|
2010-03-16 23:29:12 +03:00
|
|
|
value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
|
2007-08-16 00:20:18 +04:00
|
|
|
|
2010-03-16 23:29:12 +03:00
|
|
|
elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}|\(/o)
|
2007-08-16 00:20:18 +04:00
|
|
|
kind = :ident
|
2011-01-22 16:18:01 +03:00
|
|
|
value_expected = :set if check(unicode ? /#{patterns::VALUE_FOLLOWS}/uo :
|
|
|
|
/#{patterns::VALUE_FOLLOWS}/o)
|
2007-08-16 00:20:18 +04:00
|
|
|
|
|
|
|
# OPERATORS #
|
|
|
|
elsif not last_token_dot and match = scan(/ \.\.\.? | (?:\.|::)() | [,\(\)\[\]\{\}] | ==?=? /x)
|
|
|
|
if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
|
|
|
|
value_expected = :set
|
|
|
|
end
|
|
|
|
last_token_dot = :set if self[1]
|
|
|
|
kind = :operator
|
|
|
|
unless inline_block_stack.empty?
|
|
|
|
case match
|
|
|
|
when '{'
|
|
|
|
depth += 1
|
|
|
|
when '}'
|
|
|
|
depth -= 1
|
|
|
|
if depth == 0 # closing brace of inline block reached
|
|
|
|
state, depth, heredocs = inline_block_stack.pop
|
2010-03-16 23:29:12 +03:00
|
|
|
heredocs = nil if heredocs && heredocs.empty?
|
2007-08-16 00:20:18 +04:00
|
|
|
tokens << [match, :inline_delimiter]
|
|
|
|
kind = :inline
|
|
|
|
match = :close
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
elsif match = scan(/ ['"] /mx)
|
|
|
|
tokens << [:open, :string]
|
|
|
|
kind = :delimiter
|
|
|
|
state = patterns::StringState.new :string, match == '"', match # important for streaming
|
|
|
|
|
2011-01-22 16:18:01 +03:00
|
|
|
elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
|
|
|
|
/#{patterns::INSTANCE_VARIABLE}/o)
|
2007-08-16 00:20:18 +04:00
|
|
|
kind = :instance_variable
|
|
|
|
|
|
|
|
elsif value_expected and match = scan(/\//)
|
|
|
|
tokens << [:open, :regexp]
|
|
|
|
kind = :delimiter
|
|
|
|
interpreted = true
|
|
|
|
state = patterns::StringState.new :regexp, interpreted, match
|
|
|
|
|
2010-03-16 23:29:12 +03:00
|
|
|
# elsif match = scan(/[-+]?#{patterns::NUMERIC}/o)
|
|
|
|
elsif match = value_expected ? scan(/[-+]?#{patterns::NUMERIC}/o) : scan(/#{patterns::NUMERIC}/o)
|
|
|
|
kind = self[1] ? :float : :integer
|
2007-08-16 00:20:18 +04:00
|
|
|
|
2011-01-22 16:18:01 +03:00
|
|
|
elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
|
|
|
|
/#{patterns::SYMBOL}/o)
|
2007-08-16 00:20:18 +04:00
|
|
|
case delim = match[1]
|
|
|
|
when ?', ?"
|
|
|
|
tokens << [:open, :symbol]
|
|
|
|
tokens << [':', :symbol]
|
|
|
|
match = delim.chr
|
|
|
|
kind = :delimiter
|
|
|
|
state = patterns::StringState.new :symbol, delim == ?", match
|
|
|
|
else
|
|
|
|
kind = :symbol
|
|
|
|
end
|
|
|
|
|
2011-01-22 16:18:01 +03:00
|
|
|
elsif match = scan(/ -[>=]? | [+!~^]=? | [*|&]{1,2}=? | >>? /x)
|
2007-08-16 00:20:18 +04:00
|
|
|
value_expected = :set
|
|
|
|
kind = :operator
|
|
|
|
|
2011-01-22 16:18:01 +03:00
|
|
|
elsif value_expected and match = scan(unicode ? /#{patterns::HEREDOC_OPEN}/uo :
|
|
|
|
/#{patterns::HEREDOC_OPEN}/o)
|
2007-08-16 00:20:18 +04:00
|
|
|
indented = self[1] == '-'
|
|
|
|
quote = self[3]
|
|
|
|
delim = self[quote ? 4 : 2]
|
|
|
|
kind = patterns::QUOTE_TO_TYPE[quote]
|
|
|
|
tokens << [:open, kind]
|
|
|
|
tokens << [match, :delimiter]
|
|
|
|
match = :close
|
|
|
|
heredoc = patterns::StringState.new kind, quote != '\'', delim, (indented ? :indented : :linestart )
|
|
|
|
heredocs ||= [] # create heredocs if empty
|
|
|
|
heredocs << heredoc
|
|
|
|
|
|
|
|
elsif value_expected and match = scan(/#{patterns::FANCY_START_CORRECT}/o)
|
|
|
|
kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do
|
|
|
|
raise_inspect 'Unknown fancy string: %%%p' % k, tokens
|
|
|
|
end
|
|
|
|
tokens << [:open, kind]
|
|
|
|
state = patterns::StringState.new kind, interpreted, self[2]
|
|
|
|
kind = :delimiter
|
|
|
|
|
2011-01-22 16:18:01 +03:00
|
|
|
elsif value_expected and match = scan(unicode ? /#{patterns::CHARACTER}/uo :
|
|
|
|
/#{patterns::CHARACTER}/o)
|
2007-08-16 00:20:18 +04:00
|
|
|
kind = :integer
|
|
|
|
|
|
|
|
elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
|
|
|
|
value_expected = :set
|
|
|
|
kind = :operator
|
|
|
|
|
|
|
|
elsif match = scan(/`/)
|
|
|
|
if last_token_dot
|
|
|
|
kind = :operator
|
|
|
|
else
|
|
|
|
tokens << [:open, :shell]
|
|
|
|
kind = :delimiter
|
|
|
|
state = patterns::StringState.new :shell, true, match
|
|
|
|
end
|
|
|
|
|
2011-01-22 16:18:01 +03:00
|
|
|
elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo :
|
|
|
|
/#{patterns::GLOBAL_VARIABLE}/o)
|
2007-08-16 00:20:18 +04:00
|
|
|
kind = :global_variable
|
|
|
|
|
2011-01-22 16:18:01 +03:00
|
|
|
elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo :
|
|
|
|
/#{patterns::CLASS_VARIABLE}/o)
|
2007-08-16 00:20:18 +04:00
|
|
|
kind = :class_variable
|
|
|
|
|
|
|
|
else
|
2011-01-22 16:18:01 +03:00
|
|
|
if !unicode && !string.respond_to?(:encoding)
|
2010-03-16 23:29:12 +03:00
|
|
|
# check for unicode
|
|
|
|
debug, $DEBUG = $DEBUG, false
|
|
|
|
begin
|
|
|
|
if check(/./mu).size > 1
|
|
|
|
# seems like we should try again with unicode
|
|
|
|
unicode = true
|
|
|
|
end
|
|
|
|
rescue
|
|
|
|
# bad unicode char; use getch
|
|
|
|
ensure
|
|
|
|
$DEBUG = debug
|
|
|
|
end
|
|
|
|
next if unicode
|
|
|
|
end
|
2007-08-16 00:20:18 +04:00
|
|
|
kind = :error
|
2011-01-22 16:18:01 +03:00
|
|
|
match = scan(unicode ? /./mu : /./m)
|
2007-08-16 00:20:18 +04:00
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
elsif state == :def_expected
|
|
|
|
state = :initial
|
2010-03-16 23:29:12 +03:00
|
|
|
if scan(/self\./)
|
|
|
|
tokens << ['self', :pre_constant]
|
|
|
|
tokens << ['.', :operator]
|
|
|
|
end
|
|
|
|
if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
|
|
|
|
/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
|
2007-08-16 00:20:18 +04:00
|
|
|
kind = :method
|
|
|
|
else
|
|
|
|
next
|
|
|
|
end
|
|
|
|
|
2010-03-16 23:29:12 +03:00
|
|
|
elsif state == :module_expected
|
|
|
|
if match = scan(/<</)
|
|
|
|
kind = :operator
|
|
|
|
else
|
|
|
|
state = :initial
|
2011-01-22 16:18:01 +03:00
|
|
|
if match = scan(unicode ? /(?:#{patterns::IDENT}::)*#{patterns::IDENT}/uo :
|
|
|
|
/(?:#{patterns::IDENT}::)*#{patterns::IDENT}/o)
|
2010-03-16 23:29:12 +03:00
|
|
|
kind = :class
|
|
|
|
else
|
|
|
|
next
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2007-08-16 00:20:18 +04:00
|
|
|
elsif state == :undef_expected
|
|
|
|
state = :undef_comma_expected
|
2011-01-22 16:18:01 +03:00
|
|
|
if match = scan(unicode ? /#{patterns::METHOD_NAME_EX}/uo :
|
|
|
|
/#{patterns::METHOD_NAME_EX}/o)
|
2007-08-16 00:20:18 +04:00
|
|
|
kind = :method
|
2011-01-22 16:18:01 +03:00
|
|
|
elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
|
|
|
|
/#{patterns::SYMBOL}/o)
|
2007-08-16 00:20:18 +04:00
|
|
|
case delim = match[1]
|
|
|
|
when ?', ?"
|
|
|
|
tokens << [:open, :symbol]
|
|
|
|
tokens << [':', :symbol]
|
|
|
|
match = delim.chr
|
|
|
|
kind = :delimiter
|
|
|
|
state = patterns::StringState.new :symbol, delim == ?", match
|
|
|
|
state.next_state = :undef_comma_expected
|
|
|
|
else
|
|
|
|
kind = :symbol
|
|
|
|
end
|
|
|
|
else
|
|
|
|
state = :initial
|
|
|
|
next
|
|
|
|
end
|
|
|
|
|
2010-03-16 23:29:12 +03:00
|
|
|
elsif state == :alias_expected
|
|
|
|
match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
|
|
|
|
/(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
|
|
|
|
|
|
|
|
if match
|
|
|
|
tokens << [self[1], (self[1][0] == ?: ? :symbol : :method)]
|
|
|
|
tokens << [self[2], :space]
|
|
|
|
tokens << [self[3], (self[3][0] == ?: ? :symbol : :method)]
|
|
|
|
end
|
|
|
|
state = :initial
|
|
|
|
next
|
|
|
|
|
2007-08-16 00:20:18 +04:00
|
|
|
elsif state == :undef_comma_expected
|
|
|
|
if match = scan(/,/)
|
|
|
|
kind = :operator
|
|
|
|
state = :undef_expected
|
|
|
|
else
|
|
|
|
state = :initial
|
|
|
|
next
|
|
|
|
end
|
|
|
|
|
|
|
|
end
|
|
|
|
# }}}
|
2010-03-16 23:29:12 +03:00
|
|
|
|
|
|
|
unless kind == :error
|
2011-01-22 16:18:01 +03:00
|
|
|
if value_expected = value_expected == :set
|
|
|
|
value_expected = :expect_colon if match == '?' || match == 'when'
|
|
|
|
end
|
2010-03-16 23:29:12 +03:00
|
|
|
last_token_dot = last_token_dot == :set
|
|
|
|
end
|
|
|
|
|
|
|
|
if $CODERAY_DEBUG and not kind
|
2007-08-16 00:20:18 +04:00
|
|
|
raise_inspect 'Error token %p in line %d' %
|
|
|
|
[[match, kind], line], tokens, state
|
|
|
|
end
|
|
|
|
raise_inspect 'Empty token', tokens unless match
|
|
|
|
|
|
|
|
tokens << [match, kind]
|
|
|
|
|
|
|
|
if last_state
|
|
|
|
state = last_state
|
|
|
|
last_state = nil
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
inline_block_stack << [state] if state.is_a? patterns::StringState
|
|
|
|
until inline_block_stack.empty?
|
|
|
|
this_block = inline_block_stack.pop
|
|
|
|
tokens << [:close, :inline] if this_block.size > 1
|
|
|
|
state = this_block.first
|
|
|
|
tokens << [:close, state.type]
|
|
|
|
end
|
|
|
|
|
|
|
|
tokens
|
|
|
|
end
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# vim:fdm=marker
|