165 lines
4.1 KiB
Ruby
165 lines
4.1 KiB
Ruby
module CodeRay module Scanners
|
|
|
|
class PHP < Scanner
|
|
|
|
register_for :php
|
|
|
|
RESERVED_WORDS = [
|
|
'and', 'or', 'xor', '__FILE__', 'exception', '__LINE__', 'array', 'as', 'break', 'case',
|
|
'class', 'const', 'continue', 'declare', 'default',
|
|
'die', 'do', 'echo', 'else', 'elseif',
|
|
'empty', 'enddeclare', 'endfor', 'endforeach', 'endif',
|
|
'endswitch', 'endwhile', 'eval', 'exit', 'extends',
|
|
'for', 'foreach', 'function', 'global', 'if',
|
|
'include', 'include_once', 'isset', 'list', 'new',
|
|
'print', 'require', 'require_once', 'return', 'static',
|
|
'switch', 'unset', 'use', 'var', 'while',
|
|
'__FUNCTION__', '__CLASS__', '__METHOD__', 'final', 'php_user_filter',
|
|
'interface', 'implements', 'extends', 'public', 'private',
|
|
'protected', 'abstract', 'clone', 'try', 'catch',
|
|
'throw', 'cfunction', 'old_function'
|
|
]
|
|
|
|
PREDEFINED_CONSTANTS = [
|
|
'null', '$this', 'true', 'false'
|
|
]
|
|
|
|
IDENT_KIND = WordList.new(:ident).
|
|
add(RESERVED_WORDS, :reserved).
|
|
add(PREDEFINED_CONSTANTS, :pre_constant)
|
|
|
|
ESCAPE = / [\$\wrbfnrtv\n\\\/'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
|
|
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
|
|
|
|
def scan_tokens tokens, options
|
|
|
|
state = :waiting_php
|
|
string_type = nil
|
|
regexp_allowed = true
|
|
|
|
until eos?
|
|
|
|
kind = :error
|
|
match = nil
|
|
|
|
if state == :initial
|
|
|
|
if scan(/ \s+ | \\\n /x)
|
|
kind = :space
|
|
|
|
elsif scan(/\?>/)
|
|
kind = :char
|
|
state = :waiting_php
|
|
|
|
elsif scan(%r{ (//|\#) [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) }mx)
|
|
kind = :comment
|
|
regexp_allowed = false
|
|
|
|
elsif match = scan(/ \# \s* if \s* 0 /x)
|
|
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
|
|
kind = :comment
|
|
regexp_allowed = false
|
|
|
|
elsif regexp_allowed and scan(/\//)
|
|
tokens << [:open, :regexp]
|
|
state = :regex
|
|
kind = :delimiter
|
|
|
|
elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%] | \.(?!\d) /x)
|
|
kind = :operator
|
|
regexp_allowed=true
|
|
|
|
elsif match = scan(/ [$@A-Za-z_][A-Za-z_0-9]* /x)
|
|
kind = IDENT_KIND[match]
|
|
regexp_allowed=false
|
|
|
|
elsif match = scan(/["']/)
|
|
tokens << [:open, :string]
|
|
string_type = matched
|
|
state = :string
|
|
kind = :delimiter
|
|
|
|
elsif scan(/0[xX][0-9A-Fa-f]+/)
|
|
kind = :hex
|
|
regexp_allowed=false
|
|
|
|
elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
|
|
kind = :oct
|
|
regexp_allowed=false
|
|
|
|
elsif scan(/(?:\d+)(?![.eEfF])/)
|
|
kind = :integer
|
|
regexp_allowed=false
|
|
|
|
elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
|
|
kind = :float
|
|
regexp_allowed=false
|
|
|
|
else
|
|
getch
|
|
end
|
|
|
|
elsif state == :regex
|
|
if scan(/[^\\\/]+/)
|
|
kind = :content
|
|
elsif scan(/\\\/|\\/)
|
|
kind = :content
|
|
elsif scan(/\//)
|
|
tokens << [matched, :delimiter]
|
|
tokens << [:close, :regexp]
|
|
state = :initial
|
|
next
|
|
else
|
|
getch
|
|
kind = :content
|
|
end
|
|
|
|
elsif state == :string
|
|
if scan(/[^\\"']+/)
|
|
kind = :content
|
|
elsif scan(/["']/)
|
|
if string_type==matched
|
|
tokens << [matched, :delimiter]
|
|
tokens << [:close, :string]
|
|
state = :initial
|
|
string_type=nil
|
|
next
|
|
else
|
|
kind = :content
|
|
end
|
|
elsif scan(/ \\ (?: \S ) /mox)
|
|
kind = :char
|
|
elsif scan(/ \\ | $ /x)
|
|
kind = :error
|
|
state = :initial
|
|
else
|
|
raise "else case \" reached; %p not handled." % peek(1), tokens
|
|
end
|
|
|
|
elsif state == :waiting_php
|
|
if scan(/<\?php/m)
|
|
kind = :char
|
|
state = :initial
|
|
elsif scan(/[^<]+/)
|
|
kind = :comment
|
|
else
|
|
kind = :comment
|
|
getch
|
|
end
|
|
else
|
|
raise 'else-case reached', tokens
|
|
|
|
end
|
|
|
|
match ||= matched
|
|
|
|
tokens << [match, kind]
|
|
|
|
end
|
|
tokens
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end end |