module CodeRay
module Scanners

  class Java < Scanner

    include Streamable
    register_for :java
    helper :builtin_types
    
    # http://java.sun.com/docs/books/tutorial/java/nutsandbolts/_keywords.html
    KEYWORDS = %w[
      assert break case catch continue default do else
      finally for if instanceof import new package
      return switch throw try typeof while
      debugger export
    ]
    RESERVED = %w[ const goto ]
    CONSTANTS = %w[ false null true ]
    MAGIC_VARIABLES = %w[ this super ]
    TYPES = %w[
      boolean byte char class double enum float int interface long
      short void
    ] << '[]'  # String[] should be highlighted as a type
    DIRECTIVES = %w[
      abstract extends final implements native private protected public
      static strictfp synchronized throws transient volatile
    ]
    
    IDENT_KIND = WordList.new(:ident).
      add(KEYWORDS, :keyword).
      add(RESERVED, :reserved).
      add(CONSTANTS, :pre_constant).
      add(MAGIC_VARIABLES, :local_variable).
      add(TYPES, :type).
      add(BuiltinTypes::List, :pre_type).
      add(BuiltinTypes::List.select { |builtin| builtin[/(Error|Exception)$/] }, :exception).
      add(DIRECTIVES, :directive)

    ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
    UNICODE_ESCAPE =  / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
    STRING_CONTENT_PATTERN = {
      "'" => /[^\\']+/,
      '"' => /[^\\"]+/,
      '/' => /[^\\\/]+/,
    }
    IDENT = /[a-zA-Z_][A-Za-z_0-9]*/
    
    def scan_tokens tokens, options

      state = :initial
      string_delimiter = nil
      import_clause = class_name_follows = last_token_dot = false

      until eos?

        kind = nil
        match = nil
        
        case state

        when :initial

          if match = scan(/ \s+ | \\\n /x)
            tokens << [match, :space]
            next
          
          elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
            tokens << [match, :comment]
            next
          
          elsif import_clause && scan(/ #{IDENT} (?: \. #{IDENT} )* /ox)
            kind = :include
          
          elsif match = scan(/ #{IDENT} | \[\] /ox)
            kind = IDENT_KIND[match]
            if last_token_dot
              kind = :ident
            elsif class_name_follows
              kind = :class
              class_name_follows = false
            else
              import_clause = true if match == 'import'
              class_name_follows = true if match == 'class' || match == 'interface'
            end
          
          elsif scan(/ \.(?!\d) | [,?:()\[\]}] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? | <<<?=? | >>>?=? /x)
            kind = :operator
          
          elsif scan(/;/)
            import_clause = false
            kind = :operator
          
          elsif scan(/\{/)
            class_name_follows = false
            kind = :operator
          
          elsif check(/[\d.]/)
            if scan(/0[xX][0-9A-Fa-f]+/)
              kind = :hex
            elsif scan(/(?>0[0-7]+)(?![89.eEfF])/)
              kind = :oct
            elsif scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
              kind = :float
            elsif scan(/\d+[lL]?/)
              kind = :integer
            end

          elsif match = scan(/["']/)
            tokens << [:open, :string]
            state = :string
            string_delimiter = match
            kind = :delimiter

          elsif scan(/ @ #{IDENT} /ox)
            kind = :annotation

          else
            getch
            kind = :error

          end

        when :string
          if scan(STRING_CONTENT_PATTERN[string_delimiter])
            kind = :content
          elsif match = scan(/["'\/]/)
            tokens << [match, :delimiter]
            tokens << [:close, state]
            string_delimiter = nil
            state = :initial
            next
          elsif state == :string && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
            if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
              kind = :content
            else
              kind = :char
            end
          elsif scan(/\\./m)
            kind = :content
          elsif scan(/ \\ | $ /x)
            tokens << [:close, :delimiter]
            kind = :error
            state = :initial
          else
            raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
          end

        else
          raise_inspect 'Unknown state', tokens

        end

        match ||= matched
        if $DEBUG and not kind
          raise_inspect 'Error token %p in line %d' %
            [[match, kind], line], tokens
        end
        raise_inspect 'Empty token', tokens unless match
        
        last_token_dot = match == '.'
        
        tokens << [match, kind]

      end

      if state == :string
        tokens << [:close, state]
      end

      tokens
    end

  end

end
end