module CodeRay
module Scanners

  load :java

  class Groovy < Java

    include Streamable
    register_for :groovy
    
    # TODO: Check this!
    GROOVY_KEYWORDS = %w[
      as assert def in
    ]
    KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
      case instanceof new return throw typeof while as assert in
    ]
    GROOVY_MAGIC_VARIABLES = %w[ it ]
    
    IDENT_KIND = Java::IDENT_KIND.dup.
      add(GROOVY_KEYWORDS, :keyword).
      add(GROOVY_MAGIC_VARIABLES, :local_variable)
    
    ESCAPE = / [bfnrtv$\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
    UNICODE_ESCAPE =  / u[a-fA-F0-9]{4} /x  # no 4-byte unicode chars? U[a-fA-F0-9]{8}
    REGEXP_ESCAPE =  / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | \d | [bBdDsSwW\/] /x
    
    # TODO: interpretation inside ', ", /
    STRING_CONTENT_PATTERN = {
      "'" => /(?>\\[^\\'\n]+|[^\\'\n]+)+/,
      '"' => /[^\\$"\n]+/,
      "'''" => /(?>[^\\']+|'(?!''))+/,
      '"""' => /(?>[^\\$"]+|"(?!""))+/,
      '/' => /[^\\$\/\n]+/,
    }
    
    def scan_tokens tokens, options

      state = :initial
      inline_block_stack = []
      inline_block_paren_depth = nil
      string_delimiter = nil
      import_clause = class_name_follows = last_token = after_def = false
      value_expected = true

      until eos?

        kind = nil
        match = nil
        
        case state

        when :initial

          if match = scan(/ \s+ | \\\n /x)
            tokens << [match, :space]
            if match.index ?\n
              import_clause = after_def = false
              value_expected = true unless value_expected
            end
            next
          
          elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
            value_expected = true
            after_def = false
            kind = :comment
          
          elsif bol? && scan(/ \#!.* /x)
            kind = :doctype
          
          elsif import_clause && scan(/ (?!as) #{IDENT} (?: \. #{IDENT} )* (?: \.\* )? /ox)
            after_def = value_expected = false
            kind = :include
          
          elsif match = scan(/ #{IDENT} | \[\] /ox)
            kind = IDENT_KIND[match]
            value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
            if last_token == '.'
              kind = :ident
            elsif class_name_follows
              kind = :class
              class_name_follows = false
            elsif after_def && check(/\s*[({]/)
              kind = :method
              after_def = false
            elsif kind == :ident && last_token != '?' && check(/:/)
              kind = :key
            else
              class_name_follows = true if match == 'class' || (import_clause && match == 'as')
              import_clause = match == 'import'
              after_def = true if match == 'def'
            end
          
          elsif scan(/;/)
            import_clause = after_def = false
            value_expected = true
            kind = :operator
          
          elsif scan(/\{/)
            class_name_follows = after_def = false
            value_expected = true
            kind = :operator
            if !inline_block_stack.empty?
              inline_block_paren_depth += 1
            end
          
          # TODO: ~'...', ~"..." and ~/.../ style regexps
          elsif match = scan(/ \.\.<? | \*?\.(?!\d)@? | \.& | \?:? | [,?:(\[] | -[->] | \+\+ |
              && | \|\| | \*\*=? | ==?~ | <=?>? | [-+*%^~&|>=!]=? | <<<?=? | >>>?=? /x)
            value_expected = true
            value_expected = :regexp if match == '~'
            after_def = false
            kind = :operator
          
          elsif match = scan(/ [)\]}] /x)
            value_expected = after_def = false
            if !inline_block_stack.empty? && match == '}'
              inline_block_paren_depth -= 1
              if inline_block_paren_depth == 0  # closing brace of inline block reached
                tokens << [match, :inline_delimiter]
                tokens << [:close, :inline]
                state, string_delimiter, inline_block_paren_depth = inline_block_stack.pop
                next
              end
            end
            kind = :operator
          
          elsif check(/[\d.]/)
            after_def = value_expected = false
            if scan(/0[xX][0-9A-Fa-f]+/)
              kind = :hex
            elsif scan(/(?>0[0-7]+)(?![89.eEfF])/)
              kind = :oct
            elsif scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
              kind = :float
            elsif scan(/\d+[lLgG]?/)
              kind = :integer
            end

          elsif match = scan(/'''|"""/)
            after_def = value_expected = false
            state = :multiline_string
            tokens << [:open, :string]
            string_delimiter = match
            kind = :delimiter
          
          # TODO: record.'name'
          elsif match = scan(/["']/)
            after_def = value_expected = false
            state = match == '/' ? :regexp : :string
            tokens << [:open, state]
            string_delimiter = match
            kind = :delimiter

          elsif value_expected && (match = scan(/\//))
            after_def = value_expected = false
            tokens << [:open, :regexp]
            state = :regexp
            string_delimiter = '/'
            kind = :delimiter

          elsif scan(/ @ #{IDENT} /ox)
            after_def = value_expected = false
            kind = :annotation

          elsif scan(/\//)
            after_def = false
            value_expected = true
            kind = :operator
          
          else
            getch
            kind = :error

          end

        when :string, :regexp, :multiline_string
          if scan(STRING_CONTENT_PATTERN[string_delimiter])
            kind = :content
            
          elsif match = scan(state == :multiline_string ? /'''|"""/ : /["'\/]/)
            tokens << [match, :delimiter]
            if state == :regexp
              # TODO: regexp modifiers? s, m, x, i?
              modifiers = scan(/[ix]+/)
              tokens << [modifiers, :modifier] if modifiers && !modifiers.empty?
            end
            state = :string if state == :multiline_string
            tokens << [:close, state]
            string_delimiter = nil
            after_def = value_expected = false
            state = :initial
            next
          
          elsif (state == :string || state == :multiline_string) &&
              (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
            if string_delimiter[0] == ?' && !(match == "\\\\" || match == "\\'")
              kind = :content
            else
              kind = :char
            end
          elsif state == :regexp && scan(/ \\ (?: #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
            kind = :char
          
          elsif match = scan(/ \$ #{IDENT} /mox)
            tokens << [:open, :inline]
            tokens << ['$', :inline_delimiter]
            match = match[1..-1]
            tokens << [match, IDENT_KIND[match]]
            tokens << [:close, :inline]
            next
          elsif match = scan(/ \$ \{ /x)
            tokens << [:open, :inline]
            tokens << ['${', :inline_delimiter]
            inline_block_stack << [state, string_delimiter, inline_block_paren_depth]
            inline_block_paren_depth = 1
            state = :initial
            next
          
          elsif scan(/ \$ /mx)
            kind = :content
          
          elsif scan(/ \\. /mx)
            kind = :content
          
          elsif scan(/ \\ | \n /x)
            tokens << [:close, state]
            kind = :error
            after_def = value_expected = false
            state = :initial
          
          else
            raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
          end

        else
          raise_inspect 'Unknown state', tokens

        end

        match ||= matched
        if $CODERAY_DEBUG and not kind
          raise_inspect 'Error token %p in line %d' %
            [[match, kind], line], tokens
        end
        raise_inspect 'Empty token', tokens unless match
        
        last_token = match unless [:space, :comment, :doctype].include? kind
        
        tokens << [match, kind]

      end

      if [:multiline_string, :string, :regexp].include? state
        tokens << [:close, state]
      end

      tokens
    end

  end

end
end