Update CodeRay version to 1.0 final (#4264).

git-svn-id: svn+ssh://rubyforge.org/var/svn/redmine/trunk@7619 e93f8b46-1217-0410-a6f0-8f06a7374b81
This commit is contained in:
Etienne Massip 2011-10-08 13:35:15 +00:00
parent d1efb4f148
commit 8c2ae427fa
28 changed files with 4997 additions and 0 deletions

215
vendor/gems/coderay-1.0.0/bin/coderay vendored Normal file
View File

@ -0,0 +1,215 @@
#!/usr/bin/env ruby
require 'coderay'
$options, args = ARGV.partition { |arg| arg[/^-[hv]$|--\w+/] }
subcommand = args.first if /^\w/ === args.first
subcommand = nil if subcommand && File.exist?(subcommand)
args.delete subcommand
def option? *options
!($options & options).empty?
end
def tty?
$stdout.tty? || option?('--tty')
end
def version
puts <<-USAGE
CodeRay #{CodeRay::VERSION}
USAGE
end
def help
puts <<-HELP
This is CodeRay #{CodeRay::VERSION}, a syntax highlighting tool for selected languages.
usage:
coderay [-language] [input] [-format] [output]
defaults:
language detect from input file name or shebang; fall back to plain text
input STDIN
format detect from output file name or use terminal; fall back to HTML
output STDOUT
common:
coderay file.rb # highlight file to terminal
coderay file.rb > file.html # highlight file to HTML page
coderay file.rb -div > file.html # highlight file to HTML snippet
configure output:
coderay file.py output.json # output tokens as JSON
coderay file.py -loc # count lines of code in Python file
configure input:
coderay -python file # specify the input language
coderay -ruby # take input from STDIN
more:
coderay stylesheet [style] # print CSS stylesheet
HELP
end
def commands
puts <<-COMMANDS
general:
highlight code highlighting (default command, optional)
stylesheet print the CSS stylesheet with the given name (aliases: style, css)
about:
list [of] list all available plugins (or just the scanners|encoders|styles|filetypes)
commands print this list
help show some help
version print CodeRay version
COMMANDS
end
def print_list_of plugin_host
plugins = plugin_host.all_plugins.map do |plugin|
info = " #{plugin.plugin_id}: #{plugin.title}"
aliases = (plugin.aliases - [:default]).map { |key| "-#{key}" }.sort_by { |key| key.size }
if plugin.respond_to?(:file_extension) || !aliases.empty?
additional_info = []
additional_info << aliases.join(', ') unless aliases.empty?
info << " (#{additional_info.join('; ')})"
end
info << ' <-- default' if plugin.aliases.include? :default
info
end
puts plugins.sort
end
if option? '-v', '--version'
version
end
if option? '-h', '--help'
help
end
case subcommand
when 'highlight', nil
if ARGV.empty?
version
help
else
signature = args.map { |arg| arg[/^-/] ? '-' : 'f' }.join
names = args.map { |arg| arg.sub(/^-/, '') }
case signature
when /^$/
exit
when /^ff?$/
input_file, output_file, = *names
when /^f-f?$/
input_file, output_format, output_file, = *names
when /^-ff?$/
input_lang, input_file, output_file, = *names
when /^-f-f?$/
input_lang, input_file, output_format, output_file, = *names
when /^--?f?$/
input_lang, output_format, output_file, = *names
else
$stdout = $stderr
help
puts
puts "Unknown parameter order: #{args.join ' '}, expected: [-language] [input] [-format] [output]"
exit 1
end
if input_file
input_lang ||= CodeRay::FileType.fetch input_file, :text, true
end
if output_file
output_format ||= CodeRay::FileType[output_file]
else
output_format ||= :terminal
end
output_format = :page if output_format.to_s == 'html'
if input_file
input = File.read input_file
else
input = $stdin.read
end
begin
file =
if output_file
File.open output_file, 'w'
else
$stdout.sync = true
$stdout
end
CodeRay.encode(input, input_lang, output_format, :out => file)
file.puts
rescue CodeRay::PluginHost::PluginNotFound => boom
$stdout = $stderr
if boom.message[/CodeRay::(\w+)s could not load plugin :?(.*?): /]
puts "I don't know the #$1 \"#$2\"."
else
puts boom.message
end
# puts "I don't know this plugin: #{boom.message[/Could not load plugin (.*?): /, 1]}."
rescue CodeRay::Scanners::Scanner::ScanError # FIXME: rescue Errno::EPIPE
# this is sometimes raised by pagers; ignore [TODO: wtf?]
ensure
file.close if output_file
end
end
when 'li', 'list'
arg = args.first && args.first.downcase
if [nil, 's', 'sc', 'scanner', 'scanners'].include? arg
puts 'input languages (Scanners):'
print_list_of CodeRay::Scanners
end
if [nil, 'e', 'en', 'enc', 'encoder', 'encoders'].include? arg
puts 'output formats (Encoders):'
print_list_of CodeRay::Encoders
end
if [nil, 'st', 'style', 'styles'].include? arg
puts 'CSS themes for HTML output (Styles):'
print_list_of CodeRay::Styles
end
if [nil, 'f', 'ft', 'file', 'filetype', 'filetypes'].include? arg
puts 'recognized file types:'
filetypes = Hash.new { |h, k| h[k] = [] }
CodeRay::FileType::TypeFromExt.inject filetypes do |types, (ext, type)|
types[type.to_s] << ".#{ext}"
types
end
CodeRay::FileType::TypeFromName.inject filetypes do |types, (name, type)|
types[type.to_s] << name
types
end
filetypes.sort.each do |type, exts|
puts " #{type}: #{exts.sort_by { |ext| ext.size }.join(', ')}"
end
end
when 'stylesheet', 'style', 'css'
puts CodeRay::Encoders[:html]::CSS.new(args.first).stylesheet
when 'commands'
commands
when 'help'
help
else
$stdout = $stderr
help
puts
if subcommand[/\A\w+\z/]
puts "Unknown command: #{subcommand}"
else
puts "File not found: #{subcommand}"
end
exit 1
end

278
vendor/gems/coderay-1.0.0/lib/coderay.rb vendored Normal file
View File

@ -0,0 +1,278 @@
# encoding: utf-8
# Encoding.default_internal = 'UTF-8'
# = CodeRay Library
#
# CodeRay is a Ruby library for syntax highlighting.
#
# I try to make CodeRay easy to use and intuitive, but at the same time fully
# featured, complete, fast and efficient.
#
# See README.
#
# It consists mainly of
# * the main engine: CodeRay (Scanners::Scanner, Tokens, Encoders::Encoder)
# * the plugin system: PluginHost, Plugin
# * the scanners in CodeRay::Scanners
# * the encoders in CodeRay::Encoders
# * the styles in CodeRay::Styles
#
# Here's a fancy graphic to light up this gray docu:
#
# http://cycnus.de/raindark/coderay/scheme.png
#
# == Documentation
#
# See CodeRay, Encoders, Scanners, Tokens.
#
# == Usage
#
# Remember you need RubyGems to use CodeRay, unless you have it in your load
# path. Run Ruby with -rubygems option if required.
#
# === Highlight Ruby code in a string as html
#
# require 'coderay'
# print CodeRay.scan('puts "Hello, world!"', :ruby).html
#
# # prints something like this:
# puts <span class="s">&quot;Hello, world!&quot;</span>
#
#
# === Highlight C code from a file in a html div
#
# require 'coderay'
# print CodeRay.scan(File.read('ruby.h'), :c).div
# print CodeRay.scan_file('ruby.h').html.div
#
# You can include this div in your page. The used CSS styles can be printed with
#
# % coderay_stylesheet
#
# === Highlight without typing too much
#
# If you are one of the hasty (or lazy, or extremely curious) people, just run this file:
#
# % ruby -rubygems /path/to/coderay/coderay.rb > example.html
#
# and look at the file it created in your browser.
#
# = CodeRay Module
#
# The CodeRay module provides convenience methods for the engine.
#
# * The +lang+ and +format+ arguments select Scanner and Encoder to use. These are
# simply lower-case symbols, like <tt>:python</tt> or <tt>:html</tt>.
# * All methods take an optional hash as last parameter, +options+, that is send to
# the Encoder / Scanner.
# * Input and language are always sorted in this order: +code+, +lang+.
# (This is in alphabetical order, if you need a mnemonic ;)
#
# You should be able to highlight everything you want just using these methods;
# so there is no need to dive into CodeRay's deep class hierarchy.
#
# The examples in the demo directory demonstrate common cases using this interface.
#
# = Basic Access Ways
#
# Read this to get a general view what CodeRay provides.
#
# == Scanning
#
# Scanning means analysing an input string, splitting it up into Tokens.
# Each Token knows about what type it is: string, comment, class name, etc.
#
# Each +lang+ (language) has its own Scanner; for example, <tt>:ruby</tt> code is
# handled by CodeRay::Scanners::Ruby.
#
# CodeRay.scan:: Scan a string in a given language into Tokens.
# This is the most common method to use.
# CodeRay.scan_file:: Scan a file and guess the language using FileType.
#
# The Tokens object you get from these methods can encode itself; see Tokens.
#
# == Encoding
#
# Encoding means compiling Tokens into an output. This can be colored HTML or
# LaTeX, a textual statistic or just the number of non-whitespace tokens.
#
# Each Encoder provides output in a specific +format+, so you select Encoders via
# formats like <tt>:html</tt> or <tt>:statistic</tt>.
#
# CodeRay.encode:: Scan and encode a string in a given language.
# CodeRay.encode_tokens:: Encode the given tokens.
# CodeRay.encode_file:: Scan a file, guess the language using FileType and encode it.
#
# == All-in-One Encoding
#
# CodeRay.encode:: Highlight a string with a given input and output format.
#
# == Instanciating
#
# You can use an Encoder instance to highlight multiple inputs. This way, the setup
# for this Encoder must only be done once.
#
# CodeRay.encoder:: Create an Encoder instance with format and options.
# CodeRay.scanner:: Create an Scanner instance for lang, with '' as default code.
#
# To make use of CodeRay.scanner, use CodeRay::Scanner::code=.
#
# The scanning methods provide more flexibility; we recommend to use these.
#
# == Reusing Scanners and Encoders
#
# If you want to re-use scanners and encoders (because that is faster), see
# CodeRay::Duo for the most convenient (and recommended) interface.
module CodeRay
$CODERAY_DEBUG ||= false
require 'coderay/version'
# helpers
autoload :FileType, 'coderay/helpers/file_type'
# Tokens
autoload :Tokens, 'coderay/tokens'
autoload :TokensProxy, 'coderay/tokens_proxy'
autoload :TokenKinds, 'coderay/token_kinds'
# Plugin system
autoload :PluginHost, 'coderay/helpers/plugin'
autoload :Plugin, 'coderay/helpers/plugin'
# Plugins
autoload :Scanners, 'coderay/scanner'
autoload :Encoders, 'coderay/encoder'
autoload :Styles, 'coderay/style'
# Convenience access and reusable Encoder/Scanner pair
autoload :Duo, 'coderay/duo'
class << self
# Scans the given +code+ (a String) with the Scanner for +lang+.
#
# This is a simple way to use CodeRay. Example:
# require 'coderay'
# page = CodeRay.scan("puts 'Hello, world!'", :ruby).html
#
# See also demo/demo_simple.
def scan code, lang, options = {}, &block
# FIXME: return a proxy for direct-stream encoding
TokensProxy.new code, lang, options, block
end
# Scans +filename+ (a path to a code file) with the Scanner for +lang+.
#
# If +lang+ is :auto or omitted, the CodeRay::FileType module is used to
# determine it. If it cannot find out what type it is, it uses
# CodeRay::Scanners::Text.
#
# Calls CodeRay.scan.
#
# Example:
# require 'coderay'
# page = CodeRay.scan_file('some_c_code.c').html
def scan_file filename, lang = :auto, options = {}, &block
lang = FileType.fetch filename, :text, true if lang == :auto
code = File.read filename
scan code, lang, options, &block
end
# Encode a string.
#
# This scans +code+ with the the Scanner for +lang+ and then
# encodes it with the Encoder for +format+.
# +options+ will be passed to the Encoder.
#
# See CodeRay::Encoder.encode.
def encode code, lang, format, options = {}
encoder(format, options).encode code, lang, options
end
# Encode pre-scanned Tokens.
# Use this together with CodeRay.scan:
#
# require 'coderay'
#
# # Highlight a short Ruby code example in a HTML span
# tokens = CodeRay.scan '1 + 2', :ruby
# puts CodeRay.encode_tokens(tokens, :span)
#
def encode_tokens tokens, format, options = {}
encoder(format, options).encode_tokens tokens, options
end
# Encodes +filename+ (a path to a code file) with the Scanner for +lang+.
#
# See CodeRay.scan_file.
# Notice that the second argument is the output +format+, not the input language.
#
# Example:
# require 'coderay'
# page = CodeRay.encode_file 'some_c_code.c', :html
def encode_file filename, format, options = {}
tokens = scan_file filename, :auto, get_scanner_options(options)
encode_tokens tokens, format, options
end
# Highlight a string into a HTML <div>.
#
# CSS styles use classes, so you have to include a stylesheet
# in your output.
#
# See encode.
def highlight code, lang, options = { :css => :class }, format = :div
encode code, lang, format, options
end
# Highlight a file into a HTML <div>.
#
# CSS styles use classes, so you have to include a stylesheet
# in your output.
#
# See encode.
def highlight_file filename, options = { :css => :class }, format = :div
encode_file filename, format, options
end
# Finds the Encoder class for +format+ and creates an instance, passing
# +options+ to it.
#
# Example:
# require 'coderay'
#
# stats = CodeRay.encoder(:statistic)
# stats.encode("puts 17 + 4\n", :ruby)
#
# puts '%d out of %d tokens have the kind :integer.' % [
# stats.type_stats[:integer].count,
# stats.real_token_count
# ]
# #-> 2 out of 4 tokens have the kind :integer.
def encoder format, options = {}
Encoders[format].new options
end
# Finds the Scanner class for +lang+ and creates an instance, passing
# +options+ to it.
#
# See Scanner.new.
def scanner lang, options = {}, &block
Scanners[lang].new '', options, &block
end
# Extract the options for the scanner from the +options+ hash.
#
# Returns an empty Hash if <tt>:scanner_options</tt> is not set.
#
# This is used if a method like CodeRay.encode has to provide options
# for Encoder _and_ scanner.
def get_scanner_options options
options.fetch :scanner_options, {}
end
end
end

View File

@ -0,0 +1,65 @@
module CodeRay
module Encoders
class HTML
class CSS # :nodoc:
attr :stylesheet
def CSS.load_stylesheet style = nil
CodeRay::Styles[style]
end
def initialize style = :default
@classes = Hash.new
style = CSS.load_stylesheet style
@stylesheet = [
style::CSS_MAIN_STYLES,
style::TOKEN_COLORS.gsub(/^(?!$)/, '.CodeRay ')
].join("\n")
parse style::TOKEN_COLORS
end
def get_style styles
cl = @classes[styles.first]
return '' unless cl
style = ''
1.upto styles.size do |offset|
break if style = cl[styles[offset .. -1]]
end
# warn 'Style not found: %p' % [styles] if style.empty?
return style
end
private
CSS_CLASS_PATTERN = /
( # $1 = selectors
(?:
(?: \s* \. [-\w]+ )+
\s* ,?
)+
)
\s* \{ \s*
( [^\}]+ )? # $2 = style
\s* \} \s*
|
( [^\n]+ ) # $3 = error
/mx
def parse stylesheet
stylesheet.scan CSS_CLASS_PATTERN do |selectors, style, error|
raise "CSS parse error: '#{error.inspect}' not recognized" if error
for selector in selectors.split(',')
classes = selector.scan(/[-\w]+/)
cl = classes.pop
@classes[cl] ||= Hash.new
@classes[cl][classes] = style.to_s.strip.delete(' ').chomp(';')
end
end
end
end
end
end
end

View File

@ -0,0 +1,115 @@
module CodeRay
module Encoders
class HTML
module Numbering # :nodoc:
def self.number! output, mode = :table, options = {}
return self unless mode
options = DEFAULT_OPTIONS.merge options
start = options[:line_number_start]
unless start.is_a? Integer
raise ArgumentError, "Invalid value %p for :line_number_start; Integer expected." % start
end
anchor_prefix = options[:line_number_anchors]
anchor_prefix = 'line' if anchor_prefix == true
anchor_prefix = anchor_prefix.to_s[/\w+/] if anchor_prefix
anchoring =
if anchor_prefix
proc do |line|
line = line.to_s
anchor = anchor_prefix + line
"<a href=\"##{anchor}\" name=\"#{anchor}\">#{line}</a>"
end
else
proc { |line| line.to_s } # :to_s.to_proc in Ruby 1.8.7+
end
bold_every = options[:bold_every]
highlight_lines = options[:highlight_lines]
bolding =
if bold_every == false && highlight_lines == nil
anchoring
elsif highlight_lines.is_a? Enumerable
highlight_lines = highlight_lines.to_set
proc do |line|
if highlight_lines.include? line
"<strong class=\"highlighted\">#{anchoring[line]}</strong>" # highlighted line numbers in bold
else
anchoring[line]
end
end
elsif bold_every.is_a? Integer
raise ArgumentError, ":bolding can't be 0." if bold_every == 0
proc do |line|
if line % bold_every == 0
"<strong>#{anchoring[line]}</strong>" # every bold_every-th number in bold
else
anchoring[line]
end
end
else
raise ArgumentError, 'Invalid value %p for :bolding; false or Integer expected.' % bold_every
end
line_count = output.count("\n")
position_of_last_newline = output.rindex(RUBY_VERSION >= '1.9' ? /\n/ : ?\n)
if position_of_last_newline
after_last_newline = output[position_of_last_newline + 1 .. -1]
ends_with_newline = after_last_newline[/\A(?:<\/span>)*\z/]
line_count += 1 if not ends_with_newline
end
case mode
when :inline
max_width = (start + line_count).to_s.size
line_number = start
nesting = []
output.gsub!(/^.*$\n?/) do |line|
line.chomp!
open = nesting.join
line.scan(%r!<(/)?span[^>]*>?!) do |close,|
if close
nesting.pop
else
nesting << $&
end
end
close = '</span>' * nesting.size
line_number_text = bolding.call line_number
indent = ' ' * (max_width - line_number.to_s.size) # TODO: Optimize (10^x)
line_number += 1
"<span class=\"line-numbers\">#{indent}#{line_number_text}</span>#{open}#{line}#{close}\n"
end
when :table
line_numbers = (start ... start + line_count).map(&bolding).join("\n")
line_numbers << "\n"
line_numbers_table_template = Output::TABLE.apply('LINE_NUMBERS', line_numbers)
output.gsub!(/<\/div>\n/, '</div>')
output.wrap_in! line_numbers_table_template
output.wrapped_in = :div
when :list
raise NotImplementedError, 'The :list option is no longer available. Use :table.'
else
raise ArgumentError, 'Unknown value %p for mode: expected one of %p' %
[mode, [:table, :inline]]
end
output
end
end
end
end
end

View File

@ -0,0 +1,158 @@
module CodeRay
module Encoders
class HTML
# This module is included in the output String of the HTML Encoder.
#
# It provides methods like wrap, div, page etc.
#
# Remember to use #clone instead of #dup to keep the modules the object was
# extended with.
#
# TODO: Rewrite this without monkey patching.
module Output
attr_accessor :css
class << self
# Raises an exception if an object that doesn't respond to to_str is extended by Output,
# to prevent users from misuse. Use Module#remove_method to disable.
def extended o # :nodoc:
warn "The Output module is intended to extend instances of String, not #{o.class}." unless o.respond_to? :to_str
end
def make_stylesheet css, in_tag = false # :nodoc:
sheet = css.stylesheet
sheet = <<-'CSS' if in_tag
<style type="text/css">
#{sheet}
</style>
CSS
sheet
end
def page_template_for_css css # :nodoc:
sheet = make_stylesheet css
PAGE.apply 'CSS', sheet
end
end
def wrapped_in? element
wrapped_in == element
end
def wrapped_in
@wrapped_in ||= nil
end
attr_writer :wrapped_in
def wrap_in! template
Template.wrap! self, template, 'CONTENT'
self
end
def apply_title! title
self.sub!(/(<title>)(<\/title>)/) { $1 + title + $2 }
self
end
def wrap! element, *args
return self if not element or element == wrapped_in
case element
when :div
raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? nil
wrap_in! DIV
when :span
raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? nil
wrap_in! SPAN
when :page
wrap! :div if wrapped_in? nil
raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? :div
wrap_in! Output.page_template_for_css(@css)
if args.first.is_a?(Hash) && title = args.first[:title]
apply_title! title
end
self
when nil
return self
else
raise "Unknown value %p for :wrap" % element
end
@wrapped_in = element
self
end
def stylesheet in_tag = false
Output.make_stylesheet @css, in_tag
end
#-- don't include the templates in docu
class Template < String # :nodoc:
def self.wrap! str, template, target
target = Regexp.new(Regexp.escape("<%#{target}%>"))
if template =~ target
str[0,0] = $`
str << $'
else
raise "Template target <%%%p%%> not found" % target
end
end
def apply target, replacement
target = Regexp.new(Regexp.escape("<%#{target}%>"))
if self =~ target
Template.new($` + replacement + $')
else
raise "Template target <%%%p%%> not found" % target
end
end
end
SPAN = Template.new '<span class="CodeRay"><%CONTENT%></span>'
DIV = Template.new <<-DIV
<div class="CodeRay">
<div class="code"><pre><%CONTENT%></pre></div>
</div>
DIV
TABLE = Template.new <<-TABLE
<table class="CodeRay"><tr>
<td class="line-numbers" title="double click to toggle" ondblclick="with (this.firstChild.style) { display = (display == '') ? 'none' : '' }"><pre><%LINE_NUMBERS%></pre></td>
<td class="code"><pre><%CONTENT%></pre></td>
</tr></table>
TABLE
PAGE = Template.new <<-PAGE
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title></title>
<style type="text/css">
.CodeRay .line-numbers a {
text-decoration: inherit;
color: inherit;
}
<%CSS%>
</style>
</head>
<body style="background-color: white;">
<%CONTENT%>
</body>
</html>
PAGE
end
end
end
end

View File

@ -0,0 +1,24 @@
module CodeRay
module Scanners
map \
:'c++' => :cpp,
:cplusplus => :cpp,
:ecmascript => :java_script,
:ecma_script => :java_script,
:rhtml => :erb,
:eruby => :erb,
:irb => :ruby,
:javascript => :java_script,
:js => :java_script,
:pascal => :delphi,
:patch => :diff,
:plain => :text,
:plaintext => :text,
:xhtml => :html,
:yml => :yaml
default :text
end
end

View File

@ -0,0 +1,189 @@
module CodeRay
module Scanners
# Scanner for C.
class C < Scanner
register_for :c
file_extension 'c'
KEYWORDS = [
'asm', 'break', 'case', 'continue', 'default', 'do',
'else', 'enum', 'for', 'goto', 'if', 'return',
'sizeof', 'struct', 'switch', 'typedef', 'union', 'while',
'restrict', # added in C99
] # :nodoc:
PREDEFINED_TYPES = [
'int', 'long', 'short', 'char',
'signed', 'unsigned', 'float', 'double',
'bool', 'complex', # added in C99
] # :nodoc:
PREDEFINED_CONSTANTS = [
'EOF', 'NULL',
'true', 'false', # added in C99
] # :nodoc:
DIRECTIVES = [
'auto', 'extern', 'register', 'static', 'void',
'const', 'volatile', # added in C89
'inline', # added in C99
] # :nodoc:
IDENT_KIND = WordList.new(:ident).
add(KEYWORDS, :keyword).
add(PREDEFINED_TYPES, :predefined_type).
add(DIRECTIVES, :directive).
add(PREDEFINED_CONSTANTS, :predefined_constant) # :nodoc:
ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
protected
def scan_tokens encoder, options
state = :initial
label_expected = true
case_expected = false
label_expected_before_preproc_line = nil
in_preproc_line = false
until eos?
case state
when :initial
if match = scan(/ \s+ | \\\n /x)
if in_preproc_line && match != "\\\n" && match.index(?\n)
in_preproc_line = false
label_expected = label_expected_before_preproc_line
end
encoder.text_token match, :space
elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
encoder.text_token match, :comment
elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
label_expected = match =~ /[;\{\}]/
if case_expected
label_expected = true if match == ':'
case_expected = false
end
encoder.text_token match, :operator
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
kind = IDENT_KIND[match]
if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/)
kind = :label
match << matched
else
label_expected = false
if kind == :keyword
case match
when 'case', 'default'
case_expected = true
end
end
end
encoder.text_token match, kind
elsif match = scan(/L?"/)
encoder.begin_group :string
if match[0] == ?L
encoder.text_token 'L', :modifier
match = '"'
end
encoder.text_token match, :delimiter
state = :string
elsif match = scan(/ \# \s* if \s* 0 /x)
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
encoder.text_token match, :comment
elsif match = scan(/#[ \t]*(\w*)/)
encoder.text_token match, :preprocessor
in_preproc_line = true
label_expected_before_preproc_line = label_expected
state = :include_expected if self[1] == 'include'
elsif match = scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
label_expected = false
encoder.text_token match, :char
elsif match = scan(/\$/)
encoder.text_token match, :ident
elsif match = scan(/0[xX][0-9A-Fa-f]+/)
label_expected = false
encoder.text_token match, :hex
elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/)
label_expected = false
encoder.text_token match, :octal
elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/)
label_expected = false
encoder.text_token match, :integer
elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
label_expected = false
encoder.text_token match, :float
else
encoder.text_token getch, :error
end
when :string
if match = scan(/[^\\\n"]+/)
encoder.text_token match, :content
elsif match = scan(/"/)
encoder.text_token match, :delimiter
encoder.end_group :string
state = :initial
label_expected = false
elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
encoder.text_token match, :char
elsif match = scan(/ \\ | $ /x)
encoder.end_group :string
encoder.text_token match, :error
state = :initial
label_expected = false
else
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
end
when :include_expected
if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
encoder.text_token match, :include
state = :initial
elsif match = scan(/\s+/)
encoder.text_token match, :space
state = :initial if match.index ?\n
else
state = :initial
end
else
raise_inspect 'Unknown state', encoder
end
end
if state == :string
encoder.end_group :string
end
encoder
end
end
end
end

View File

@ -0,0 +1,217 @@
# encoding: utf-8
module CodeRay
module Scanners
# Clojure scanner by Licenser.
class Clojure < Scanner
register_for :clojure
file_extension 'clj'
SPECIAL_FORMS = %w[
def if do let quote var fn loop recur throw try catch monitor-enter monitor-exit .
new
] # :nodoc:
CORE_FORMS = %w[
+ - -> ->> .. / * <= < = == >= > accessor aclone add-classpath add-watch
agent agent-error agent-errors aget alength alias all-ns alter alter-meta!
alter-var-root amap ancestors and apply areduce array-map aset aset-boolean
aset-byte aset-char aset-double aset-float aset-int aset-long aset-short
assert assoc assoc! assoc-in associative? atom await await-for bases bean
bigdec bigint binding bit-and bit-and-not bit-clear bit-flip bit-not bit-or
bit-set bit-shift-left bit-shift-right bit-test bit-xor boolean boolean-array
booleans bound-fn bound-fn* bound? butlast byte byte-array bytes case cast char
char-array char-escape-string char-name-string char? chars class class?
clear-agent-errors clojure-version coll? comment commute comp comparator
compare compare-and-set! compile complement concat cond condp conj conj!
cons constantly construct-proxy contains? count counted? create-ns
create-struct cycle dec decimal? declare definline defmacro defmethod defmulti
defn defn- defonce defprotocol defrecord defstruct deftype delay delay?
deliver denominator deref derive descendants disj disj! dissoc dissoc!
distinct distinct? doall doc dorun doseq dosync dotimes doto double
double-array doubles drop drop-last drop-while empty empty? ensure
enumeration-seq error-handler error-mode eval even? every? extend
extend-protocol extend-type extenders extends? false? ffirst file-seq
filter find find-doc find-ns find-var first float float-array float?
floats flush fn fn? fnext for force format future future-call future-cancel
future-cancelled? future-done? future? gen-class gen-interface gensym get
get-in get-method get-proxy-class get-thread-bindings get-validator hash
hash-map hash-set identical? identity if-let if-not ifn? import in-ns
inc init-proxy instance? int int-array integer? interleave intern
interpose into into-array ints io! isa? iterate iterator-seq juxt key
keys keyword keyword? last lazy-cat lazy-seq let letfn line-seq list list*
list? load load-file load-reader load-string loaded-libs locking long
long-array longs loop macroexpand macroexpand-1 make-array make-hierarchy
map map? mapcat max max-key memfn memoize merge merge-with meta methods
min min-key mod name namespace neg? newline next nfirst nil? nnext not
not-any? not-empty not-every? not= ns ns-aliases ns-imports ns-interns
ns-map ns-name ns-publics ns-refers ns-resolve ns-unalias ns-unmap nth
nthnext num number? numerator object-array odd? or parents partial
partition pcalls peek persistent! pmap pop pop! pop-thread-bindings
pos? pr pr-str prefer-method prefers print print-namespace-doc
print-str printf println println-str prn prn-str promise proxy
proxy-mappings proxy-super push-thread-bindings pvalues quot rand
rand-int range ratio? rationalize re-find re-groups re-matcher
re-matches re-pattern re-seq read read-line read-string reduce ref
ref-history-count ref-max-history ref-min-history ref-set refer
refer-clojure reify release-pending-sends rem remove remove-all-methods
remove-method remove-ns remove-watch repeat repeatedly replace replicate
require reset! reset-meta! resolve rest restart-agent resultset-seq
reverse reversible? rseq rsubseq satisfies? second select-keys send
send-off seq seq? seque sequence sequential? set set-error-handler!
set-error-mode! set-validator! set? short short-array shorts
shutdown-agents slurp some sort sort-by sorted-map sorted-map-by
sorted-set sorted-set-by sorted? special-form-anchor special-symbol?
split-at split-with str string? struct struct-map subs subseq subvec
supers swap! symbol symbol? sync syntax-symbol-anchor take take-last
take-nth take-while test the-ns thread-bound? time to-array to-array-2d
trampoline transient tree-seq true? type unchecked-add unchecked-dec
unchecked-divide unchecked-inc unchecked-multiply unchecked-negate
unchecked-remainder unchecked-subtract underive update-in update-proxy
use val vals var-get var-set var? vary-meta vec vector vector-of vector?
when when-first when-let when-not while with-bindings with-bindings*
with-in-str with-local-vars with-meta with-open with-out-str
with-precision xml-seq zero? zipmap
] # :nodoc:
PREDEFINED_CONSTANTS = %w[
true false nil *1 *2 *3 *agent* *clojure-version* *command-line-args*
*compile-files* *compile-path* *e *err* *file* *flush-on-newline*
*in* *ns* *out* *print-dup* *print-length* *print-level* *print-meta*
*print-readably* *read-eval* *warn-on-reflection*
] # :nodoc:
IDENT_KIND = WordList.new(:ident).
add(SPECIAL_FORMS, :keyword).
add(CORE_FORMS, :keyword).
add(PREDEFINED_CONSTANTS, :predefined_constant)
KEYWORD_NEXT_TOKEN_KIND = WordList.new(nil).
add(%w[ def defn defn- definline defmacro defmulti defmethod defstruct defonce declare ], :function).
add(%w[ ns ], :namespace).
add(%w[ defprotocol defrecord ], :class)
BASIC_IDENTIFIER = /[a-zA-Z$%*\/_+!?&<>\-=]=?[a-zA-Z0-9$&*+!\/_?<>\-\#]*/
IDENTIFIER = /(?!-\d)(?:(?:#{BASIC_IDENTIFIER}\.)*#{BASIC_IDENTIFIER}(?:\/#{BASIC_IDENTIFIER})?\.?)|\.\.?/
SYMBOL = /::?#{IDENTIFIER}/o
DIGIT = /\d/
DIGIT10 = DIGIT
DIGIT16 = /[0-9a-f]/i
DIGIT8 = /[0-7]/
DIGIT2 = /[01]/
RADIX16 = /\#x/i
RADIX8 = /\#o/i
RADIX2 = /\#b/i
RADIX10 = /\#d/i
EXACTNESS = /#i|#e/i
SIGN = /[\+-]?/
EXP_MARK = /[esfdl]/i
EXP = /#{EXP_MARK}#{SIGN}#{DIGIT}+/
SUFFIX = /#{EXP}?/
PREFIX10 = /#{RADIX10}?#{EXACTNESS}?|#{EXACTNESS}?#{RADIX10}?/
PREFIX16 = /#{RADIX16}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX16}/
PREFIX8 = /#{RADIX8}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX8}/
PREFIX2 = /#{RADIX2}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX2}/
UINT10 = /#{DIGIT10}+#*/
UINT16 = /#{DIGIT16}+#*/
UINT8 = /#{DIGIT8}+#*/
UINT2 = /#{DIGIT2}+#*/
DECIMAL = /#{DIGIT10}+#+\.#*#{SUFFIX}|#{DIGIT10}+\.#{DIGIT10}*#*#{SUFFIX}|\.#{DIGIT10}+#*#{SUFFIX}|#{UINT10}#{EXP}/
UREAL10 = /#{UINT10}\/#{UINT10}|#{DECIMAL}|#{UINT10}/
UREAL16 = /#{UINT16}\/#{UINT16}|#{UINT16}/
UREAL8 = /#{UINT8}\/#{UINT8}|#{UINT8}/
UREAL2 = /#{UINT2}\/#{UINT2}|#{UINT2}/
REAL10 = /#{SIGN}#{UREAL10}/
REAL16 = /#{SIGN}#{UREAL16}/
REAL8 = /#{SIGN}#{UREAL8}/
REAL2 = /#{SIGN}#{UREAL2}/
IMAG10 = /i|#{UREAL10}i/
IMAG16 = /i|#{UREAL16}i/
IMAG8 = /i|#{UREAL8}i/
IMAG2 = /i|#{UREAL2}i/
COMPLEX10 = /#{REAL10}@#{REAL10}|#{REAL10}\+#{IMAG10}|#{REAL10}-#{IMAG10}|\+#{IMAG10}|-#{IMAG10}|#{REAL10}/
COMPLEX16 = /#{REAL16}@#{REAL16}|#{REAL16}\+#{IMAG16}|#{REAL16}-#{IMAG16}|\+#{IMAG16}|-#{IMAG16}|#{REAL16}/
COMPLEX8 = /#{REAL8}@#{REAL8}|#{REAL8}\+#{IMAG8}|#{REAL8}-#{IMAG8}|\+#{IMAG8}|-#{IMAG8}|#{REAL8}/
COMPLEX2 = /#{REAL2}@#{REAL2}|#{REAL2}\+#{IMAG2}|#{REAL2}-#{IMAG2}|\+#{IMAG2}|-#{IMAG2}|#{REAL2}/
NUM10 = /#{PREFIX10}?#{COMPLEX10}/
NUM16 = /#{PREFIX16}#{COMPLEX16}/
NUM8 = /#{PREFIX8}#{COMPLEX8}/
NUM2 = /#{PREFIX2}#{COMPLEX2}/
NUM = /#{NUM10}|#{NUM16}|#{NUM8}|#{NUM2}/
protected
def scan_tokens encoder, options
state = :initial
kind = nil
until eos?
case state
when :initial
if match = scan(/ \s+ | \\\n | , /x)
encoder.text_token match, :space
elsif match = scan(/['`\(\[\)\]\{\}]|\#[({]|~@?|[@\^]/)
encoder.text_token match, :operator
elsif match = scan(/;.*/)
encoder.text_token match, :comment # TODO: recognize (comment ...) too
elsif match = scan(/\#?\\(?:newline|space|.?)/)
encoder.text_token match, :char
elsif match = scan(/\#[ft]/)
encoder.text_token match, :predefined_constant
elsif match = scan(/#{IDENTIFIER}/o)
kind = IDENT_KIND[match]
encoder.text_token match, kind
if rest? && kind == :keyword
if kind = KEYWORD_NEXT_TOKEN_KIND[match]
encoder.text_token match, :space if match = scan(/\s+/o)
encoder.text_token match, kind if match = scan(/#{IDENTIFIER}/o)
end
end
elsif match = scan(/#{SYMBOL}/o)
encoder.text_token match, :symbol
elsif match = scan(/\./)
encoder.text_token match, :operator
elsif match = scan(/ \# \^ #{IDENTIFIER} /ox)
encoder.text_token match, :type
elsif match = scan(/ (\#)? " /x)
state = self[1] ? :regexp : :string
encoder.begin_group state
encoder.text_token match, :delimiter
elsif match = scan(/#{NUM}/o) and not matched.empty?
encoder.text_token match, match[/[.e\/]/i] ? :float : :integer
else
encoder.text_token getch, :error
end
when :string, :regexp
if match = scan(/[^"\\]+|\\.?/)
encoder.text_token match, :content
elsif match = scan(/"/)
encoder.text_token match, :delimiter
encoder.end_group state
state = :initial
else
raise_inspect "else case \" reached; %p not handled." % peek(1),
encoder, state
end
else
raise 'else case reached'
end
end
if [:string, :regexp].include? state
encoder.end_group state
end
encoder
end
end
end
end

View File

@ -0,0 +1,215 @@
module CodeRay
module Scanners
# Scanner for C++.
#
# Aliases: +cplusplus+, c++
class CPlusPlus < Scanner
register_for :cpp
file_extension 'cpp'
title 'C++'
#-- http://www.cppreference.com/wiki/keywords/start
KEYWORDS = [
'and', 'and_eq', 'asm', 'bitand', 'bitor', 'break',
'case', 'catch', 'class', 'compl', 'const_cast',
'continue', 'default', 'delete', 'do', 'dynamic_cast', 'else',
'enum', 'export', 'for', 'goto', 'if', 'namespace', 'new',
'not', 'not_eq', 'or', 'or_eq', 'reinterpret_cast', 'return',
'sizeof', 'static_cast', 'struct', 'switch', 'template',
'throw', 'try', 'typedef', 'typeid', 'typename', 'union',
'while', 'xor', 'xor_eq',
] # :nodoc:
PREDEFINED_TYPES = [
'bool', 'char', 'double', 'float', 'int', 'long',
'short', 'signed', 'unsigned', 'wchar_t', 'string',
] # :nodoc:
PREDEFINED_CONSTANTS = [
'false', 'true',
'EOF', 'NULL',
] # :nodoc:
PREDEFINED_VARIABLES = [
'this',
] # :nodoc:
DIRECTIVES = [
'auto', 'const', 'explicit', 'extern', 'friend', 'inline', 'mutable', 'operator',
'private', 'protected', 'public', 'register', 'static', 'using', 'virtual', 'void',
'volatile',
] # :nodoc:
IDENT_KIND = WordList.new(:ident).
add(KEYWORDS, :keyword).
add(PREDEFINED_TYPES, :predefined_type).
add(PREDEFINED_VARIABLES, :local_variable).
add(DIRECTIVES, :directive).
add(PREDEFINED_CONSTANTS, :predefined_constant) # :nodoc:
ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
protected
def scan_tokens encoder, options
state = :initial
label_expected = true
case_expected = false
label_expected_before_preproc_line = nil
in_preproc_line = false
until eos?
case state
when :initial
if match = scan(/ \s+ | \\\n /x)
if in_preproc_line && match != "\\\n" && match.index(?\n)
in_preproc_line = false
label_expected = label_expected_before_preproc_line
end
encoder.text_token match, :space
elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
encoder.text_token match, :comment
elsif match = scan(/ \# \s* if \s* 0 /x)
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
encoder.text_token match, :comment
elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
label_expected = match =~ /[;\{\}]/
if case_expected
label_expected = true if match == ':'
case_expected = false
end
encoder.text_token match, :operator
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
kind = IDENT_KIND[match]
if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/)
kind = :label
match << matched
else
label_expected = false
if kind == :keyword
case match
when 'class'
state = :class_name_expected
when 'case', 'default'
case_expected = true
end
end
end
encoder.text_token match, kind
elsif match = scan(/\$/)
encoder.text_token match, :ident
elsif match = scan(/L?"/)
encoder.begin_group :string
if match[0] == ?L
encoder.text_token match, 'L', :modifier
match = '"'
end
state = :string
encoder.text_token match, :delimiter
elsif match = scan(/#[ \t]*(\w*)/)
encoder.text_token match, :preprocessor
in_preproc_line = true
label_expected_before_preproc_line = label_expected
state = :include_expected if self[1] == 'include'
elsif match = scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
label_expected = false
encoder.text_token match, :char
elsif match = scan(/0[xX][0-9A-Fa-f]+/)
label_expected = false
encoder.text_token match, :hex
elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/)
label_expected = false
encoder.text_token match, :octal
elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/)
label_expected = false
encoder.text_token match, :integer
elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
label_expected = false
encoder.text_token match, :float
else
encoder.text_token getch, :error
end
when :string
if match = scan(/[^\\"]+/)
encoder.text_token match, :content
elsif match = scan(/"/)
encoder.text_token match, :delimiter
encoder.end_group :string
state = :initial
label_expected = false
elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
encoder.text_token match, :char
elsif match = scan(/ \\ | $ /x)
encoder.end_group :string
encoder.text_token match, :error
state = :initial
label_expected = false
else
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
end
when :include_expected
if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
encoder.text_token match, :include
state = :initial
elsif match = scan(/\s+/)
encoder.text_token match, :space
state = :initial if match.index ?\n
else
state = :initial
end
when :class_name_expected
if match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
encoder.text_token match, :class
state = :initial
elsif match = scan(/\s+/)
encoder.text_token match, :space
else
encoder.text_token getch, :error
state = :initial
end
else
raise_inspect 'Unknown state', encoder
end
end
if state == :string
encoder.end_group :string
end
encoder
end
end
end
end

View File

@ -0,0 +1,192 @@
module CodeRay
module Scanners
class CSS < Scanner
register_for :css
KINDS_NOT_LOC = [
:comment,
:class, :pseudo_class, :type,
:constant, :directive,
:key, :value, :operator, :color, :float, :string,
:error, :important,
] # :nodoc:
module RE # :nodoc:
Hex = /[0-9a-fA-F]/
Unicode = /\\#{Hex}{1,6}(?:\r\n|\s)?/ # differs from standard because it allows uppercase hex too
Escape = /#{Unicode}|\\[^\r\n\f0-9a-fA-F]/
NMChar = /[-_a-zA-Z0-9]|#{Escape}/
NMStart = /[_a-zA-Z]|#{Escape}/
NL = /\r\n|\r|\n|\f/
String1 = /"(?:[^\n\r\f\\"]|\\#{NL}|#{Escape})*"?/ # TODO: buggy regexp
String2 = /'(?:[^\n\r\f\\']|\\#{NL}|#{Escape})*'?/ # TODO: buggy regexp
String = /#{String1}|#{String2}/
HexColor = /#(?:#{Hex}{6}|#{Hex}{3})/
Color = /#{HexColor}/
Num = /-?(?:[0-9]+|[0-9]*\.[0-9]+)/
Name = /#{NMChar}+/
Ident = /-?#{NMStart}#{NMChar}*/
AtKeyword = /@#{Ident}/
Percentage = /#{Num}%/
reldimensions = %w[em ex px]
absdimensions = %w[in cm mm pt pc]
Unit = Regexp.union(*(reldimensions + absdimensions))
Dimension = /#{Num}#{Unit}/
Comment = %r! /\* (?: .*? \*/ | .* ) !mx
Function = /(?:url|alpha|attr|counters?)\((?:[^)\n\r\f]|\\\))*\)?/
Id = /##{Name}/
Class = /\.#{Name}/
PseudoClass = /:#{Name}/
AttributeSelector = /\[[^\]]*\]?/
end
protected
def scan_tokens encoder, options
value_expected = nil
states = [:initial]
until eos?
if match = scan(/\s+/)
encoder.text_token match, :space
elsif case states.last
when :initial, :media
if match = scan(/(?>#{RE::Ident})(?!\()|\*/ox)
encoder.text_token match, :type
next
elsif match = scan(RE::Class)
encoder.text_token match, :class
next
elsif match = scan(RE::Id)
encoder.text_token match, :constant
next
elsif match = scan(RE::PseudoClass)
encoder.text_token match, :pseudo_class
next
elsif match = scan(RE::AttributeSelector)
# TODO: Improve highlighting inside of attribute selectors.
encoder.text_token match[0,1], :operator
encoder.text_token match[1..-2], :attribute_name if match.size > 2
encoder.text_token match[-1,1], :operator if match[-1] == ?]
next
elsif match = scan(/@media/)
encoder.text_token match, :directive
states.push :media_before_name
next
end
when :block
if match = scan(/(?>#{RE::Ident})(?!\()/ox)
if value_expected
encoder.text_token match, :value
else
encoder.text_token match, :key
end
next
end
when :media_before_name
if match = scan(RE::Ident)
encoder.text_token match, :type
states[-1] = :media_after_name
next
end
when :media_after_name
if match = scan(/\{/)
encoder.text_token match, :operator
states[-1] = :media
next
end
else
#:nocov:
raise_inspect 'Unknown state', encoder
#:nocov:
end
elsif match = scan(/\/\*(?:.*?\*\/|\z)/m)
encoder.text_token match, :comment
elsif match = scan(/\{/)
value_expected = false
encoder.text_token match, :operator
states.push :block
elsif match = scan(/\}/)
value_expected = false
if states.last == :block || states.last == :media
encoder.text_token match, :operator
states.pop
else
encoder.text_token match, :error
end
elsif match = scan(/#{RE::String}/o)
encoder.begin_group :string
encoder.text_token match[0, 1], :delimiter
encoder.text_token match[1..-2], :content if match.size > 2
encoder.text_token match[-1, 1], :delimiter if match.size >= 2
encoder.end_group :string
elsif match = scan(/#{RE::Function}/o)
encoder.begin_group :string
start = match[/^\w+\(/]
encoder.text_token start, :delimiter
if match[-1] == ?)
encoder.text_token match[start.size..-2], :content
encoder.text_token ')', :delimiter
else
encoder.text_token match[start.size..-1], :content
end
encoder.end_group :string
elsif match = scan(/(?: #{RE::Dimension} | #{RE::Percentage} | #{RE::Num} )/ox)
encoder.text_token match, :float
elsif match = scan(/#{RE::Color}/o)
encoder.text_token match, :color
elsif match = scan(/! *important/)
encoder.text_token match, :important
elsif match = scan(/(?:rgb|hsl)a?\([^()\n]*\)?/)
encoder.text_token match, :color
elsif match = scan(RE::AtKeyword)
encoder.text_token match, :directive
elsif match = scan(/ [+>:;,.=()\/] /x)
if match == ':'
value_expected = true
elsif match == ';'
value_expected = false
end
encoder.text_token match, :operator
else
encoder.text_token getch, :error
end
end
encoder
end
end
end
end

View File

@ -0,0 +1,65 @@
module CodeRay
module Scanners
# = Debug Scanner
#
# Interprets the output of the Encoders::Debug encoder.
class Debug < Scanner
register_for :debug
title 'CodeRay Token Dump Import'
protected
def scan_tokens encoder, options
opened_tokens = []
until eos?
if match = scan(/\s+/)
encoder.text_token match, :space
elsif match = scan(/ (\w+) \( ( [^\)\\]* ( \\. [^\)\\]* )* ) \)? /x)
kind = self[1].to_sym
match = self[2].gsub(/\\(.)/m, '\1')
unless TokenKinds.has_key? kind
kind = :error
match = matched
end
encoder.text_token match, kind
elsif match = scan(/ (\w+) ([<\[]) /x)
kind = self[1].to_sym
opened_tokens << kind
case self[2]
when '<'
encoder.begin_group kind
when '['
encoder.begin_line kind
else
raise 'CodeRay bug: This case should not be reached.'
end
elsif !opened_tokens.empty? && match = scan(/ > /x)
encoder.end_group opened_tokens.pop
elsif !opened_tokens.empty? && match = scan(/ \] /x)
encoder.end_line opened_tokens.pop
else
encoder.text_token getch, :space
end
end
encoder.end_group opened_tokens.pop until opened_tokens.empty?
encoder
end
end
end
end

View File

@ -0,0 +1,144 @@
module CodeRay
module Scanners
# Scanner for the Delphi language (Object Pascal).
#
# Alias: +pascal+
class Delphi < Scanner
register_for :delphi
file_extension 'pas'
KEYWORDS = [
'and', 'array', 'as', 'at', 'asm', 'at', 'begin', 'case', 'class',
'const', 'constructor', 'destructor', 'dispinterface', 'div', 'do',
'downto', 'else', 'end', 'except', 'exports', 'file', 'finalization',
'finally', 'for', 'function', 'goto', 'if', 'implementation', 'in',
'inherited', 'initialization', 'inline', 'interface', 'is', 'label',
'library', 'mod', 'nil', 'not', 'object', 'of', 'or', 'out', 'packed',
'procedure', 'program', 'property', 'raise', 'record', 'repeat',
'resourcestring', 'set', 'shl', 'shr', 'string', 'then', 'threadvar',
'to', 'try', 'type', 'unit', 'until', 'uses', 'var', 'while', 'with',
'xor', 'on',
] # :nodoc:
DIRECTIVES = [
'absolute', 'abstract', 'assembler', 'at', 'automated', 'cdecl',
'contains', 'deprecated', 'dispid', 'dynamic', 'export',
'external', 'far', 'forward', 'implements', 'local',
'near', 'nodefault', 'on', 'overload', 'override',
'package', 'pascal', 'platform', 'private', 'protected', 'public',
'published', 'read', 'readonly', 'register', 'reintroduce',
'requires', 'resident', 'safecall', 'stdcall', 'stored', 'varargs',
'virtual', 'write', 'writeonly',
] # :nodoc:
IDENT_KIND = WordList::CaseIgnoring.new(:ident).
add(KEYWORDS, :keyword).
add(DIRECTIVES, :directive) # :nodoc:
NAME_FOLLOWS = WordList::CaseIgnoring.new(false).
add(%w(procedure function .)) # :nodoc:
protected
def scan_tokens encoder, options
state = :initial
last_token = ''
until eos?
if state == :initial
if match = scan(/ \s+ /x)
encoder.text_token match, :space
next
elsif match = scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx)
encoder.text_token match, :preprocessor
next
elsif match = scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx)
encoder.text_token match, :comment
next
elsif match = scan(/ <[>=]? | >=? | :=? | [-+=*\/;,@\^|\(\)\[\]] | \.\. /x)
encoder.text_token match, :operator
elsif match = scan(/\./)
encoder.text_token match, :operator
next if last_token == 'end'
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
encoder.text_token match, NAME_FOLLOWS[last_token] ? :ident : IDENT_KIND[match]
elsif match = skip(/ ' ( [^\n']|'' ) (?:'|$) /x)
encoder.begin_group :char
encoder.text_token "'", :delimiter
encoder.text_token self[1], :content
encoder.text_token "'", :delimiter
encoder.end_group :char
next
elsif match = scan(/ ' /x)
encoder.begin_group :string
encoder.text_token match, :delimiter
state = :string
elsif match = scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x)
encoder.text_token match, :char
elsif match = scan(/ \$ [0-9A-Fa-f]+ /x)
encoder.text_token match, :hex
elsif match = scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x)
encoder.text_token match, :integer
elsif match = scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x)
encoder.text_token match, :float
else
encoder.text_token getch, :error
next
end
elsif state == :string
if match = scan(/[^\n']+/)
encoder.text_token match, :content
elsif match = scan(/''/)
encoder.text_token match, :char
elsif match = scan(/'/)
encoder.text_token match, :delimiter
encoder.end_group :string
state = :initial
next
elsif match = scan(/\n/)
encoder.end_group :string
encoder.text_token match, :space
state = :initial
else
raise "else case \' reached; %p not handled." % peek(1), encoder
end
else
raise 'else-case reached', encoder
end
last_token = match
end
if state == :string
encoder.end_group state
end
encoder
end
end
end
end

View File

@ -0,0 +1,201 @@
module CodeRay
module Scanners
# Scanner for output of the diff command.
#
# Alias: +patch+
class Diff < Scanner
register_for :diff
title 'diff output'
DEFAULT_OPTIONS = {
:highlight_code => true,
:inline_diff => true,
}
protected
require 'coderay/helpers/file_type'
def scan_tokens encoder, options
line_kind = nil
state = :initial
deleted_lines = 0
scanners = Hash.new do |h, lang|
h[lang] = Scanners[lang].new '', :keep_tokens => true, :keep_state => true
end
content_scanner = scanners[:plain]
content_scanner_entry_state = nil
until eos?
if match = scan(/\n/)
deleted_lines = 0 unless line_kind == :delete
if line_kind
encoder.end_line line_kind
line_kind = nil
end
encoder.text_token match, :space
next
end
case state
when :initial
if match = scan(/--- |\+\+\+ |=+|_+/)
encoder.begin_line line_kind = :head
encoder.text_token match, :head
if match = scan(/.*?(?=$|[\t\n\x00]| \(revision)/)
encoder.text_token match, :filename
if options[:highlight_code]
file_type = FileType.fetch(match, :text)
file_type = :text if file_type == :diff
content_scanner = scanners[file_type]
content_scanner_entry_state = nil
end
end
next unless match = scan(/.+/)
encoder.text_token match, :plain
elsif match = scan(/Index: |Property changes on: /)
encoder.begin_line line_kind = :head
encoder.text_token match, :head
next unless match = scan(/.+/)
encoder.text_token match, :plain
elsif match = scan(/Added: /)
encoder.begin_line line_kind = :head
encoder.text_token match, :head
next unless match = scan(/.+/)
encoder.text_token match, :plain
state = :added
elsif match = scan(/\\ .*/)
encoder.text_token match, :comment
elsif match = scan(/@@(?>[^@\n]*)@@/)
content_scanner.state = :initial unless match?(/\n\+/)
content_scanner_entry_state = nil
if check(/\n|$/)
encoder.begin_line line_kind = :change
else
encoder.begin_group :change
end
encoder.text_token match[0,2], :change
encoder.text_token match[2...-2], :plain
encoder.text_token match[-2,2], :change
encoder.end_group :change unless line_kind
next unless match = scan(/.+/)
if options[:highlight_code]
content_scanner.tokenize match, :tokens => encoder
else
encoder.text_token match, :plain
end
next
elsif match = scan(/\+/)
encoder.begin_line line_kind = :insert
encoder.text_token match, :insert
next unless match = scan(/.+/)
if options[:highlight_code]
content_scanner.tokenize match, :tokens => encoder
else
encoder.text_token match, :plain
end
next
elsif match = scan(/-/)
deleted_lines += 1
encoder.begin_line line_kind = :delete
encoder.text_token match, :delete
if options[:inline_diff] && deleted_lines == 1 && check(/(?>.*)\n\+(?>.*)$(?!\n\+)/)
content_scanner_entry_state = content_scanner.state
skip(/(.*)\n\+(.*)$/)
head, deletion, insertion, tail = diff self[1], self[2]
pre, deleted, post = content_scanner.tokenize [head, deletion, tail], :tokens => Tokens.new
encoder.tokens pre
unless deleted.empty?
encoder.begin_group :eyecatcher
encoder.tokens deleted
encoder.end_group :eyecatcher
end
encoder.tokens post
encoder.end_line line_kind
encoder.text_token "\n", :space
encoder.begin_line line_kind = :insert
encoder.text_token '+', :insert
content_scanner.state = content_scanner_entry_state || :initial
pre, inserted, post = content_scanner.tokenize [head, insertion, tail], :tokens => Tokens.new
encoder.tokens pre
unless inserted.empty?
encoder.begin_group :eyecatcher
encoder.tokens inserted
encoder.end_group :eyecatcher
end
encoder.tokens post
elsif match = scan(/.*/)
if options[:highlight_code]
if deleted_lines == 1
content_scanner_entry_state = content_scanner.state
end
content_scanner.tokenize match, :tokens => encoder unless match.empty?
if !match?(/\n-/)
if match?(/\n\+/)
content_scanner.state = content_scanner_entry_state || :initial
end
content_scanner_entry_state = nil
end
else
encoder.text_token match, :plain
end
end
next
elsif match = scan(/ .*/)
if options[:highlight_code]
content_scanner.tokenize match, :tokens => encoder
else
encoder.text_token match, :plain
end
next
elsif match = scan(/.+/)
encoder.begin_line line_kind = :comment
encoder.text_token match, :plain
else
raise_inspect 'else case rached'
end
when :added
if match = scan(/ \+/)
encoder.begin_line line_kind = :insert
encoder.text_token match, :insert
next unless match = scan(/.+/)
encoder.text_token match, :plain
else
state = :initial
next
end
end
end
encoder.end_line line_kind if line_kind
encoder
end
private
def diff a, b
# i will be the index of the leftmost difference from the left.
i_max = [a.size, b.size].min
i = 0
i += 1 while i < i_max && a[i] == b[i]
# j_min will be the index of the leftmost difference from the right.
j_min = i - i_max
# j will be the index of the rightmost difference from the right which
# does not precede the leftmost one from the left.
j = -1
j -= 1 while j >= j_min && a[j] == b[j]
return a[0...i], a[i..j], b[i..j], (j < -1) ? a[j+1..-1] : ''
end
end
end
end

View File

@ -0,0 +1,81 @@
module CodeRay
module Scanners
load :html
load :ruby
# Scanner for HTML ERB templates.
class ERB < Scanner
register_for :erb
title 'HTML ERB Template'
KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
ERB_RUBY_BLOCK = /
(<%(?!%)[-=\#]?)
((?>
[^\-%]* # normal*
(?> # special
(?: %(?!>) | -(?!%>) )
[^\-%]* # normal*
)*
))
((?: -?%> )?)
/x # :nodoc:
START_OF_ERB = /
<%(?!%)
/x # :nodoc:
protected
def setup
@ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
@html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
end
def reset_instance
super
@html_scanner.reset
end
def scan_tokens encoder, options
until eos?
if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_rest) and not match.empty?
@html_scanner.tokenize match, :tokens => encoder
elsif match = scan(/#{ERB_RUBY_BLOCK}/o)
start_tag = self[1]
code = self[2]
end_tag = self[3]
encoder.begin_group :inline
encoder.text_token start_tag, :inline_delimiter
if start_tag == '<%#'
encoder.text_token code, :comment
else
@ruby_scanner.tokenize code, :tokens => encoder
end unless code.empty?
encoder.text_token end_tag, :inline_delimiter unless end_tag.empty?
encoder.end_group :inline
else
raise_inspect 'else-case reached!', encoder
end
end
encoder
end
end
end
end

View File

@ -0,0 +1,255 @@
module CodeRay
module Scanners
load :java
# Scanner for Groovy.
class Groovy < Java
register_for :groovy
# TODO: check list of keywords
GROOVY_KEYWORDS = %w[
as assert def in
] # :nodoc:
KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
case instanceof new return throw typeof while as assert in
] # :nodoc:
GROOVY_MAGIC_VARIABLES = %w[ it ] # :nodoc:
IDENT_KIND = Java::IDENT_KIND.dup.
add(GROOVY_KEYWORDS, :keyword).
add(GROOVY_MAGIC_VARIABLES, :local_variable) # :nodoc:
ESCAPE = / [bfnrtv$\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc: no 4-byte unicode chars? U[a-fA-F0-9]{8}
REGEXP_ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | \d | [bBdDsSwW\/] /x # :nodoc:
# TODO: interpretation inside ', ", /
STRING_CONTENT_PATTERN = {
"'" => /(?>\\[^\\'\n]+|[^\\'\n]+)+/,
'"' => /[^\\$"\n]+/,
"'''" => /(?>[^\\']+|'(?!''))+/,
'"""' => /(?>[^\\$"]+|"(?!""))+/,
'/' => /[^\\$\/\n]+/,
} # :nodoc:
protected
def scan_tokens encoder, options
state = :initial
inline_block_stack = []
inline_block_paren_depth = nil
string_delimiter = nil
import_clause = class_name_follows = last_token = after_def = false
value_expected = true
until eos?
case state
when :initial
if match = scan(/ \s+ | \\\n /x)
encoder.text_token match, :space
if match.index ?\n
import_clause = after_def = false
value_expected = true unless value_expected
end
next
elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
value_expected = true
after_def = false
encoder.text_token match, :comment
elsif bol? && match = scan(/ \#!.* /x)
encoder.text_token match, :doctype
elsif import_clause && match = scan(/ (?!as) #{IDENT} (?: \. #{IDENT} )* (?: \.\* )? /ox)
after_def = value_expected = false
encoder.text_token match, :include
elsif match = scan(/ #{IDENT} | \[\] /ox)
kind = IDENT_KIND[match]
value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
if last_token == '.'
kind = :ident
elsif class_name_follows
kind = :class
class_name_follows = false
elsif after_def && check(/\s*[({]/)
kind = :method
after_def = false
elsif kind == :ident && last_token != '?' && check(/:/)
kind = :key
else
class_name_follows = true if match == 'class' || (import_clause && match == 'as')
import_clause = match == 'import'
after_def = true if match == 'def'
end
encoder.text_token match, kind
elsif match = scan(/;/)
import_clause = after_def = false
value_expected = true
encoder.text_token match, :operator
elsif match = scan(/\{/)
class_name_follows = after_def = false
value_expected = true
encoder.text_token match, :operator
if !inline_block_stack.empty?
inline_block_paren_depth += 1
end
# TODO: ~'...', ~"..." and ~/.../ style regexps
elsif match = scan(/ \.\.<? | \*?\.(?!\d)@? | \.& | \?:? | [,?:(\[] | -[->] | \+\+ |
&& | \|\| | \*\*=? | ==?~ | <=?>? | [-+*%^~&|>=!]=? | <<<?=? | >>>?=? /x)
value_expected = true
value_expected = :regexp if match == '~'
after_def = false
encoder.text_token match, :operator
elsif match = scan(/ [)\]}] /x)
value_expected = after_def = false
if !inline_block_stack.empty? && match == '}'
inline_block_paren_depth -= 1
if inline_block_paren_depth == 0 # closing brace of inline block reached
encoder.text_token match, :inline_delimiter
encoder.end_group :inline
state, string_delimiter, inline_block_paren_depth = inline_block_stack.pop
next
end
end
encoder.text_token match, :operator
elsif check(/[\d.]/)
after_def = value_expected = false
if match = scan(/0[xX][0-9A-Fa-f]+/)
encoder.text_token match, :hex
elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
encoder.text_token match, :octal
elsif match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
encoder.text_token match, :float
elsif match = scan(/\d+[lLgG]?/)
encoder.text_token match, :integer
end
elsif match = scan(/'''|"""/)
after_def = value_expected = false
state = :multiline_string
encoder.begin_group :string
string_delimiter = match
encoder.text_token match, :delimiter
# TODO: record.'name' syntax
elsif match = scan(/["']/)
after_def = value_expected = false
state = match == '/' ? :regexp : :string
encoder.begin_group state
string_delimiter = match
encoder.text_token match, :delimiter
elsif value_expected && match = scan(/\//)
after_def = value_expected = false
encoder.begin_group :regexp
state = :regexp
string_delimiter = '/'
encoder.text_token match, :delimiter
elsif match = scan(/ @ #{IDENT} /ox)
after_def = value_expected = false
encoder.text_token match, :annotation
elsif match = scan(/\//)
after_def = false
value_expected = true
encoder.text_token match, :operator
else
encoder.text_token getch, :error
end
when :string, :regexp, :multiline_string
if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
encoder.text_token match, :content
elsif match = scan(state == :multiline_string ? /'''|"""/ : /["'\/]/)
encoder.text_token match, :delimiter
if state == :regexp
# TODO: regexp modifiers? s, m, x, i?
modifiers = scan(/[ix]+/)
encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
end
state = :string if state == :multiline_string
encoder.end_group state
string_delimiter = nil
after_def = value_expected = false
state = :initial
next
elsif (state == :string || state == :multiline_string) &&
(match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
if string_delimiter[0] == ?' && !(match == "\\\\" || match == "\\'")
encoder.text_token match, :content
else
encoder.text_token match, :char
end
elsif state == :regexp && match = scan(/ \\ (?: #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
encoder.text_token match, :char
elsif match = scan(/ \$ #{IDENT} /mox)
encoder.begin_group :inline
encoder.text_token '$', :inline_delimiter
match = match[1..-1]
encoder.text_token match, IDENT_KIND[match]
encoder.end_group :inline
next
elsif match = scan(/ \$ \{ /x)
encoder.begin_group :inline
encoder.text_token match, :inline_delimiter
inline_block_stack << [state, string_delimiter, inline_block_paren_depth]
inline_block_paren_depth = 1
state = :initial
next
elsif match = scan(/ \$ /mx)
encoder.text_token match, :content
elsif match = scan(/ \\. /mx)
encoder.text_token match, :content # TODO: Shouldn't this be :error?
elsif match = scan(/ \\ | \n /x)
encoder.end_group state
encoder.text_token match, :error
after_def = value_expected = false
state = :initial
else
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
end
else
raise_inspect 'Unknown state', encoder
end
last_token = match unless [:space, :comment, :doctype].include? kind
end
if [:multiline_string, :string, :regexp].include? state
encoder.end_group state
end
encoder
end
end
end
end

View File

@ -0,0 +1,168 @@
module CodeRay
module Scanners
load :ruby
load :html
load :java_script
class HAML < Scanner
register_for :haml
title 'HAML Template'
KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
protected
def setup
super
@ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
@embedded_ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true, :state => @ruby_scanner.interpreted_string_state
@html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true
end
def scan_tokens encoder, options
match = nil
code = ''
until eos?
if bol?
if match = scan(/!!!.*/)
encoder.text_token match, :doctype
next
end
if match = scan(/(?>( *)(\/(?!\[if)|-\#|:javascript|:ruby|:\w+) *)(?=\n)/)
encoder.text_token match, :comment
code = self[2]
if match = scan(/(?:\n+#{self[1]} .*)+/)
case code
when '/', '-#'
encoder.text_token match, :comment
when ':javascript'
# TODO: recognize #{...} snippets inside JavaScript
@java_script_scanner ||= CodeRay.scanner :java_script, :tokens => @tokens, :keep_tokens => true
@java_script_scanner.tokenize match, :tokens => encoder
when ':ruby'
@ruby_scanner.tokenize match, :tokens => encoder
when /:\w+/
encoder.text_token match, :comment
else
raise 'else-case reached: %p' % [code]
end
end
end
if match = scan(/ +/)
encoder.text_token match, :space
end
if match = scan(/\/.*/)
encoder.text_token match, :comment
next
end
if match = scan(/\\/)
encoder.text_token match, :plain
if match = scan(/.+/)
@html_scanner.tokenize match, :tokens => encoder
end
next
end
tag = false
if match = scan(/%[\w:]+\/?/)
encoder.text_token match, :tag
# if match = scan(/( +)(.+)/)
# encoder.text_token self[1], :space
# @embedded_ruby_scanner.tokenize self[2], :tokens => encoder
# end
tag = true
end
while match = scan(/([.#])[-\w]*\w/)
encoder.text_token match, self[1] == '#' ? :constant : :class
tag = true
end
if tag && match = scan(/(\()([^)]+)?(\))?/)
# TODO: recognize title=@title, class="widget_#{@widget.number}"
encoder.text_token self[1], :plain
@html_scanner.tokenize self[2], :tokens => encoder, :state => :attribute if self[2]
encoder.text_token self[3], :plain if self[3]
end
if tag && match = scan(/\{/)
encoder.text_token match, :plain
code = ''
level = 1
while true
code << scan(/([^\{\},\n]|, *\n?)*/)
case match = getch
when '{'
level += 1
code << match
when '}'
level -= 1
if level > 0
code << match
else
break
end
when "\n", ",", nil
break
end
end
@ruby_scanner.tokenize code, :tokens => encoder unless code.empty?
encoder.text_token match, :plain if match
end
if tag && match = scan(/(\[)([^\]\n]+)?(\])?/)
encoder.text_token self[1], :plain
@ruby_scanner.tokenize self[2], :tokens => encoder if self[2]
encoder.text_token self[3], :plain if self[3]
end
if tag && match = scan(/\//)
encoder.text_token match, :tag
end
if scan(/(>?<?[-=]|[&!]=|(& |!)|~)( *)([^,\n\|]+(?:(, *|\|(?=.|\n.*\|$))\n?[^,\n\|]*)*)?/)
encoder.text_token self[1] + self[3], :plain
if self[4]
if self[2]
@embedded_ruby_scanner.tokenize self[4], :tokens => encoder
else
@ruby_scanner.tokenize self[4], :tokens => encoder
end
end
elsif match = scan(/((?:<|><?)(?![!?\/\w]))?(.+)?/)
encoder.text_token self[1], :plain if self[1]
# TODO: recognize #{...} snippets
@html_scanner.tokenize self[2], :tokens => encoder if self[2]
end
elsif match = scan(/.+/)
@html_scanner.tokenize match, :tokens => encoder
end
if match = scan(/\n/)
encoder.text_token match, :space
end
end
encoder
end
end
end
end

View File

@ -0,0 +1,253 @@
module CodeRay
module Scanners
# HTML Scanner
#
# Alias: +xhtml+
#
# See also: Scanners::XML
class HTML < Scanner
register_for :html
KINDS_NOT_LOC = [
:comment, :doctype, :preprocessor,
:tag, :attribute_name, :operator,
:attribute_value, :string,
:plain, :entity, :error,
] # :nodoc:
EVENT_ATTRIBUTES = %w(
onabort onafterprint onbeforeprint onbeforeunload onblur oncanplay
oncanplaythrough onchange onclick oncontextmenu oncuechange ondblclick
ondrag ondragdrop ondragend ondragenter ondragleave ondragover
ondragstart ondrop ondurationchange onemptied onended onerror onfocus
onformchange onforminput onhashchange oninput oninvalid onkeydown
onkeypress onkeyup onload onloadeddata onloadedmetadata onloadstart
onmessage onmousedown onmousemove onmouseout onmouseover onmouseup
onmousewheel onmove onoffline ononline onpagehide onpageshow onpause
onplay onplaying onpopstate onprogress onratechange onreadystatechange
onredo onreset onresize onscroll onseeked onseeking onselect onshow
onstalled onstorage onsubmit onsuspend ontimeupdate onundo onunload
onvolumechange onwaiting
)
IN_ATTRIBUTE = WordList::CaseIgnoring.new(nil).
add(EVENT_ATTRIBUTES, :script)
ATTR_NAME = /[\w.:-]+/ # :nodoc:
TAG_END = /\/?>/ # :nodoc:
HEX = /[0-9a-fA-F]/ # :nodoc:
ENTITY = /
&
(?:
\w+
|
\#
(?:
\d+
|
x#{HEX}+
)
)
;
/ox # :nodoc:
PLAIN_STRING_CONTENT = {
"'" => /[^&'>\n]+/,
'"' => /[^&">\n]+/,
} # :nodoc:
def reset
super
@state = :initial
@plain_string_content = nil
end
protected
def setup
@state = :initial
@plain_string_content = nil
end
def scan_java_script encoder, code
if code && !code.empty?
@java_script_scanner ||= Scanners::JavaScript.new '', :keep_tokens => true
# encoder.begin_group :inline
@java_script_scanner.tokenize code, :tokens => encoder
# encoder.end_group :inline
end
end
def scan_tokens encoder, options
state = options[:state] || @state
plain_string_content = @plain_string_content
in_tag = in_attribute = nil
encoder.begin_group :string if state == :attribute_value_string
until eos?
if state != :in_special_tag && match = scan(/\s+/m)
encoder.text_token match, :space
else
case state
when :initial
if match = scan(/<!--(?:.*?-->|.*)/m)
encoder.text_token match, :comment
elsif match = scan(/<!DOCTYPE(?:.*?>|.*)/m)
encoder.text_token match, :doctype
elsif match = scan(/<\?xml(?:.*?\?>|.*)/m)
encoder.text_token match, :preprocessor
elsif match = scan(/<\?(?:.*?\?>|.*)/m)
encoder.text_token match, :comment
elsif match = scan(/<\/[-\w.:]*>?/m)
in_tag = nil
encoder.text_token match, :tag
elsif match = scan(/<(?:(script)|[-\w.:]+)(>)?/m)
encoder.text_token match, :tag
in_tag = self[1]
if self[2]
state = :in_special_tag if in_tag
else
state = :attribute
end
elsif match = scan(/[^<>&]+/)
encoder.text_token match, :plain
elsif match = scan(/#{ENTITY}/ox)
encoder.text_token match, :entity
elsif match = scan(/[<>&]/)
in_tag = nil
encoder.text_token match, :error
else
raise_inspect '[BUG] else-case reached with state %p' % [state], encoder
end
when :attribute
if match = scan(/#{TAG_END}/o)
encoder.text_token match, :tag
in_attribute = nil
if in_tag
state = :in_special_tag
else
state = :initial
end
elsif match = scan(/#{ATTR_NAME}/o)
in_attribute = IN_ATTRIBUTE[match]
encoder.text_token match, :attribute_name
state = :attribute_equal
else
in_tag = nil
encoder.text_token getch, :error
end
when :attribute_equal
if match = scan(/=/) #/
encoder.text_token match, :operator
state = :attribute_value
elsif scan(/#{ATTR_NAME}/o) || scan(/#{TAG_END}/o)
state = :attribute
next
else
encoder.text_token getch, :error
state = :attribute
end
when :attribute_value
if match = scan(/#{ATTR_NAME}/o)
encoder.text_token match, :attribute_value
state = :attribute
elsif match = scan(/["']/)
if in_attribute == :script
encoder.begin_group :inline
encoder.text_token match, :inline_delimiter
if scan(/javascript:[ \t]*/)
encoder.text_token matched, :comment
end
code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/)
scan_java_script encoder, code
match = scan(/["']/)
encoder.text_token match, :inline_delimiter if match
encoder.end_group :inline
state = :attribute
in_attribute = nil
else
encoder.begin_group :string
state = :attribute_value_string
plain_string_content = PLAIN_STRING_CONTENT[match]
encoder.text_token match, :delimiter
end
elsif match = scan(/#{TAG_END}/o)
encoder.text_token match, :tag
state = :initial
else
encoder.text_token getch, :error
end
when :attribute_value_string
if match = scan(plain_string_content)
encoder.text_token match, :content
elsif match = scan(/['"]/)
encoder.text_token match, :delimiter
encoder.end_group :string
state = :attribute
elsif match = scan(/#{ENTITY}/ox)
encoder.text_token match, :entity
elsif match = scan(/&/)
encoder.text_token match, :content
elsif match = scan(/[\n>]/)
encoder.end_group :string
state = :initial
encoder.text_token match, :error
end
when :in_special_tag
case in_tag
when 'script'
encoder.text_token match, :space if match = scan(/[ \t]*\n/)
if scan(/(\s*<!--)(?:(.*?)(-->)|(.*))/m)
code = self[2] || self[4]
closing = self[3]
encoder.text_token self[1], :comment
else
code = scan_until(/(?=(?:\n\s*)?<\/script>)|\z/)
closing = false
end
unless code.empty?
encoder.begin_group :inline
scan_java_script encoder, code
encoder.end_group :inline
end
encoder.text_token closing, :comment if closing
state = :initial
else
raise 'unknown special tag: %p' % [in_tag]
end
else
raise_inspect 'Unknown state: %p' % [state], encoder
end
end
end
if options[:keep_state]
@state = state
@plain_string_content = plain_string_content
end
encoder.end_group :string if state == :attribute_value_string
encoder
end
end
end
end

View File

@ -0,0 +1,174 @@
module CodeRay
module Scanners
# Scanner for Java.
class Java < Scanner
register_for :java
autoload :BuiltinTypes, 'coderay/scanners/java/builtin_types'
# http://java.sun.com/docs/books/tutorial/java/nutsandbolts/_keywords.html
KEYWORDS = %w[
assert break case catch continue default do else
finally for if instanceof import new package
return switch throw try typeof while
debugger export
] # :nodoc:
RESERVED = %w[ const goto ] # :nodoc:
CONSTANTS = %w[ false null true ] # :nodoc:
MAGIC_VARIABLES = %w[ this super ] # :nodoc:
TYPES = %w[
boolean byte char class double enum float int interface long
short void
] << '[]' # :nodoc: because int[] should be highlighted as a type
DIRECTIVES = %w[
abstract extends final implements native private protected public
static strictfp synchronized throws transient volatile
] # :nodoc:
IDENT_KIND = WordList.new(:ident).
add(KEYWORDS, :keyword).
add(RESERVED, :reserved).
add(CONSTANTS, :predefined_constant).
add(MAGIC_VARIABLES, :local_variable).
add(TYPES, :type).
add(BuiltinTypes::List, :predefined_type).
add(BuiltinTypes::List.select { |builtin| builtin[/(Error|Exception)$/] }, :exception).
add(DIRECTIVES, :directive) # :nodoc:
ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
STRING_CONTENT_PATTERN = {
"'" => /[^\\']+/,
'"' => /[^\\"]+/,
'/' => /[^\\\/]+/,
} # :nodoc:
IDENT = /[a-zA-Z_][A-Za-z_0-9]*/ # :nodoc:
protected
def scan_tokens encoder, options
state = :initial
string_delimiter = nil
package_name_expected = false
class_name_follows = false
last_token_dot = false
until eos?
case state
when :initial
if match = scan(/ \s+ | \\\n /x)
encoder.text_token match, :space
next
elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
encoder.text_token match, :comment
next
elsif package_name_expected && match = scan(/ #{IDENT} (?: \. #{IDENT} )* /ox)
encoder.text_token match, package_name_expected
elsif match = scan(/ #{IDENT} | \[\] /ox)
kind = IDENT_KIND[match]
if last_token_dot
kind = :ident
elsif class_name_follows
kind = :class
class_name_follows = false
else
case match
when 'import'
package_name_expected = :include
when 'package'
package_name_expected = :namespace
when 'class', 'interface'
class_name_follows = true
end
end
encoder.text_token match, kind
elsif match = scan(/ \.(?!\d) | [,?:()\[\]}] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? | <<<?=? | >>>?=? /x)
encoder.text_token match, :operator
elsif match = scan(/;/)
package_name_expected = false
encoder.text_token match, :operator
elsif match = scan(/\{/)
class_name_follows = false
encoder.text_token match, :operator
elsif check(/[\d.]/)
if match = scan(/0[xX][0-9A-Fa-f]+/)
encoder.text_token match, :hex
elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
encoder.text_token match, :octal
elsif match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
encoder.text_token match, :float
elsif match = scan(/\d+[lL]?/)
encoder.text_token match, :integer
end
elsif match = scan(/["']/)
state = :string
encoder.begin_group state
string_delimiter = match
encoder.text_token match, :delimiter
elsif match = scan(/ @ #{IDENT} /ox)
encoder.text_token match, :annotation
else
encoder.text_token getch, :error
end
when :string
if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
encoder.text_token match, :content
elsif match = scan(/["'\/]/)
encoder.text_token match, :delimiter
encoder.end_group state
state = :initial
string_delimiter = nil
elsif state == :string && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
encoder.text_token match, :content
else
encoder.text_token match, :char
end
elsif match = scan(/\\./m)
encoder.text_token match, :content
elsif match = scan(/ \\ | $ /x)
encoder.end_group state
state = :initial
encoder.text_token match, :error
else
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
end
else
raise_inspect 'Unknown state', encoder
end
last_token_dot = match == '.'
end
if state == :string
encoder.end_group state
end
encoder
end
end
end
end

View File

@ -0,0 +1,213 @@
module CodeRay
module Scanners
# Scanner for JavaScript.
#
# Aliases: +ecmascript+, +ecma_script+, +javascript+
class JavaScript < Scanner
register_for :java_script
file_extension 'js'
# The actual JavaScript keywords.
KEYWORDS = %w[
break case catch continue default delete do else
finally for function if in instanceof new
return switch throw try typeof var void while with
] # :nodoc:
PREDEFINED_CONSTANTS = %w[
false null true undefined NaN Infinity
] # :nodoc:
MAGIC_VARIABLES = %w[ this arguments ] # :nodoc: arguments was introduced in JavaScript 1.4
KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
case delete in instanceof new return throw typeof with
] # :nodoc:
# Reserved for future use.
RESERVED_WORDS = %w[
abstract boolean byte char class debugger double enum export extends
final float goto implements import int interface long native package
private protected public short static super synchronized throws transient
volatile
] # :nodoc:
IDENT_KIND = WordList.new(:ident).
add(RESERVED_WORDS, :reserved).
add(PREDEFINED_CONSTANTS, :predefined_constant).
add(MAGIC_VARIABLES, :local_variable).
add(KEYWORDS, :keyword) # :nodoc:
ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
REGEXP_ESCAPE = / [bBdDsSwW] /x # :nodoc:
STRING_CONTENT_PATTERN = {
"'" => /[^\\']+/,
'"' => /[^\\"]+/,
'/' => /[^\\\/]+/,
} # :nodoc:
KEY_CHECK_PATTERN = {
"'" => / (?> [^\\']* (?: \\. [^\\']* )* ) ' \s* : /mx,
'"' => / (?> [^\\"]* (?: \\. [^\\"]* )* ) " \s* : /mx,
} # :nodoc:
protected
def scan_tokens encoder, options
state = :initial
string_delimiter = nil
value_expected = true
key_expected = false
function_expected = false
until eos?
case state
when :initial
if match = scan(/ \s+ | \\\n /x)
value_expected = true if !value_expected && match.index(?\n)
encoder.text_token match, :space
elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
value_expected = true
encoder.text_token match, :comment
elsif check(/\.?\d/)
key_expected = value_expected = false
if match = scan(/0[xX][0-9A-Fa-f]+/)
encoder.text_token match, :hex
elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
encoder.text_token match, :octal
elsif match = scan(/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
encoder.text_token match, :float
elsif match = scan(/\d+/)
encoder.text_token match, :integer
end
elsif value_expected && match = scan(/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim)
# TODO: scan over nested tags
xml_scanner.tokenize match, :tokens => encoder
value_expected = false
next
elsif match = scan(/ [-+*=<>?:;,!&^|(\[{~%]+ | \.(?!\d) /x)
value_expected = true
last_operator = match[-1]
key_expected = (last_operator == ?{) || (last_operator == ?,)
function_expected = false
encoder.text_token match, :operator
elsif match = scan(/ [)\]}]+ /x)
function_expected = key_expected = value_expected = false
encoder.text_token match, :operator
elsif match = scan(/ [$a-zA-Z_][A-Za-z_0-9$]* /x)
kind = IDENT_KIND[match]
value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
# TODO: labels
if kind == :ident
if match.index(?$) # $ allowed inside an identifier
kind = :predefined
elsif function_expected
kind = :function
elsif check(/\s*[=:]\s*function\b/)
kind = :function
elsif key_expected && check(/\s*:/)
kind = :key
end
end
function_expected = (kind == :keyword) && (match == 'function')
key_expected = false
encoder.text_token match, kind
elsif match = scan(/["']/)
if key_expected && check(KEY_CHECK_PATTERN[match])
state = :key
else
state = :string
end
encoder.begin_group state
string_delimiter = match
encoder.text_token match, :delimiter
elsif value_expected && (match = scan(/\//))
encoder.begin_group :regexp
state = :regexp
string_delimiter = '/'
encoder.text_token match, :delimiter
elsif match = scan(/ \/ /x)
value_expected = true
key_expected = false
encoder.text_token match, :operator
else
encoder.text_token getch, :error
end
when :string, :regexp, :key
if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
encoder.text_token match, :content
elsif match = scan(/["'\/]/)
encoder.text_token match, :delimiter
if state == :regexp
modifiers = scan(/[gim]+/)
encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
end
encoder.end_group state
string_delimiter = nil
key_expected = value_expected = false
state = :initial
elsif state != :regexp && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
encoder.text_token match, :content
else
encoder.text_token match, :char
end
elsif state == :regexp && match = scan(/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
encoder.text_token match, :char
elsif match = scan(/\\./m)
encoder.text_token match, :content
elsif match = scan(/ \\ | $ /x)
encoder.end_group state
encoder.text_token match, :error
key_expected = value_expected = false
state = :initial
else
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
end
else
raise_inspect 'Unknown state', encoder
end
end
if [:string, :regexp].include? state
encoder.end_group state
end
encoder
end
protected
def reset_instance
super
@xml_scanner.reset if defined? @xml_scanner
end
def xml_scanner
@xml_scanner ||= CodeRay.scanner :xml, :tokens => @tokens, :keep_tokens => true, :keep_state => false
end
end
end
end

View File

@ -0,0 +1,95 @@
module CodeRay
module Scanners
# Scanner for JSON (JavaScript Object Notation).
class JSON < Scanner
register_for :json
file_extension 'json'
KINDS_NOT_LOC = [
:float, :char, :content, :delimiter,
:error, :integer, :operator, :value,
] # :nodoc:
ESCAPE = / [bfnrt\\"\/] /x # :nodoc:
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc:
protected
# See http://json.org/ for a definition of the JSON lexic/grammar.
def scan_tokens encoder, options
state = :initial
stack = []
key_expected = false
until eos?
case state
when :initial
if match = scan(/ \s+ /x)
encoder.text_token match, :space
elsif match = scan(/"/)
state = key_expected ? :key : :string
encoder.begin_group state
encoder.text_token match, :delimiter
elsif match = scan(/ [:,\[{\]}] /x)
encoder.text_token match, :operator
case match
when ':' then key_expected = false
when ',' then key_expected = true if stack.last == :object
when '{' then stack << :object; key_expected = true
when '[' then stack << :array
when '}', ']' then stack.pop # no error recovery, but works for valid JSON
end
elsif match = scan(/ true | false | null /x)
encoder.text_token match, :value
elsif match = scan(/ -? (?: 0 | [1-9]\d* ) /x)
if scan(/ \.\d+ (?:[eE][-+]?\d+)? | [eE][-+]? \d+ /x)
match << matched
encoder.text_token match, :float
else
encoder.text_token match, :integer
end
else
encoder.text_token getch, :error
end
when :string, :key
if match = scan(/[^\\"]+/)
encoder.text_token match, :content
elsif match = scan(/"/)
encoder.text_token match, :delimiter
encoder.end_group state
state = :initial
elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
encoder.text_token match, :char
elsif match = scan(/\\./m)
encoder.text_token match, :content
elsif match = scan(/ \\ | $ /x)
encoder.end_group state
encoder.text_token match, :error
state = :initial
else
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
end
else
raise_inspect 'Unknown state: %p' % [state], encoder
end
end
if [:string, :key].include? state
encoder.end_group state
end
encoder
end
end
end
end

View File

@ -0,0 +1,509 @@
module CodeRay
module Scanners
load :html
# Scanner for PHP.
#
# Original by Stefan Walk.
class PHP < Scanner
register_for :php
file_extension 'php'
encoding 'BINARY'
KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
protected
def setup
@html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
end
def reset_instance
super
@html_scanner.reset
end
module Words # :nodoc:
# according to http://www.php.net/manual/en/reserved.keywords.php
KEYWORDS = %w[
abstract and array as break case catch class clone const continue declare default do else elseif
enddeclare endfor endforeach endif endswitch endwhile extends final for foreach function global
goto if implements interface instanceof namespace new or private protected public static switch
throw try use var while xor
cfunction old_function
]
TYPES = %w[ int integer float double bool boolean string array object resource ]
LANGUAGE_CONSTRUCTS = %w[
die echo empty exit eval include include_once isset list
require require_once return print unset
]
CLASSES = %w[ Directory stdClass __PHP_Incomplete_Class exception php_user_filter Closure ]
# according to http://php.net/quickref.php on 2009-04-21;
# all functions with _ excluded (module functions) and selected additional functions
BUILTIN_FUNCTIONS = %w[
abs acos acosh addcslashes addslashes aggregate array arsort ascii2ebcdic asin asinh asort assert atan atan2
atanh basename bcadd bccomp bcdiv bcmod bcmul bcpow bcpowmod bcscale bcsqrt bcsub bin2hex bindec
bindtextdomain bzclose bzcompress bzdecompress bzerrno bzerror bzerrstr bzflush bzopen bzread bzwrite
calculhmac ceil chdir checkdate checkdnsrr chgrp chmod chop chown chr chroot clearstatcache closedir closelog
compact constant copy cos cosh count crc32 crypt current date dcgettext dcngettext deaggregate decbin dechex
decoct define defined deg2rad delete dgettext die dirname diskfreespace dl dngettext doubleval each
ebcdic2ascii echo empty end ereg eregi escapeshellarg escapeshellcmd eval exec exit exp explode expm1 extract
fclose feof fflush fgetc fgetcsv fgets fgetss file fileatime filectime filegroup fileinode filemtime fileowner
fileperms filepro filesize filetype floatval flock floor flush fmod fnmatch fopen fpassthru fprintf fputcsv
fputs fread frenchtojd fscanf fseek fsockopen fstat ftell ftok ftruncate fwrite getallheaders getcwd getdate
getenv gethostbyaddr gethostbyname gethostbynamel getimagesize getlastmod getmxrr getmygid getmyinode getmypid
getmyuid getopt getprotobyname getprotobynumber getrandmax getrusage getservbyname getservbyport gettext
gettimeofday gettype glob gmdate gmmktime gmstrftime gregoriantojd gzclose gzcompress gzdecode gzdeflate
gzencode gzeof gzfile gzgetc gzgets gzgetss gzinflate gzopen gzpassthru gzputs gzread gzrewind gzseek gztell
gzuncompress gzwrite hash header hebrev hebrevc hexdec htmlentities htmlspecialchars hypot iconv idate
implode include intval ip2long iptcembed iptcparse isset
jddayofweek jdmonthname jdtofrench jdtogregorian jdtojewish jdtojulian jdtounix jewishtojd join jpeg2wbmp
juliantojd key krsort ksort lcfirst lchgrp lchown levenshtein link linkinfo list localeconv localtime log
log10 log1p long2ip lstat ltrim mail main max md5 metaphone mhash microtime min mkdir mktime msql natcasesort
natsort next ngettext nl2br nthmac octdec opendir openlog
ord overload pack passthru pathinfo pclose pfsockopen phpcredits phpinfo phpversion pi png2wbmp popen pos pow
prev print printf putenv quotemeta rad2deg rand range rawurldecode rawurlencode readdir readfile readgzfile
readline readlink realpath recode rename require reset rewind rewinddir rmdir round rsort rtrim scandir
serialize setcookie setlocale setrawcookie settype sha1 shuffle signeurlpaiement sin sinh sizeof sleep snmpget
snmpgetnext snmprealwalk snmpset snmpwalk snmpwalkoid sort soundex split spliti sprintf sqrt srand sscanf stat
strcasecmp strchr strcmp strcoll strcspn strftime stripcslashes stripos stripslashes stristr strlen
strnatcasecmp strnatcmp strncasecmp strncmp strpbrk strpos strptime strrchr strrev strripos strrpos strspn
strstr strtok strtolower strtotime strtoupper strtr strval substr symlink syslog system tan tanh tempnam
textdomain time tmpfile touch trim uasort ucfirst ucwords uksort umask uniqid unixtojd unlink unpack
unserialize unset urldecode urlencode usleep usort vfprintf virtual vprintf vsprintf wordwrap
array_change_key_case array_chunk array_combine array_count_values array_diff array_diff_assoc
array_diff_key array_diff_uassoc array_diff_ukey array_fill array_fill_keys array_filter array_flip
array_intersect array_intersect_assoc array_intersect_key array_intersect_uassoc array_intersect_ukey
array_key_exists array_keys array_map array_merge array_merge_recursive array_multisort array_pad
array_pop array_product array_push array_rand array_reduce array_reverse array_search array_shift
array_slice array_splice array_sum array_udiff array_udiff_assoc array_udiff_uassoc array_uintersect
array_uintersect_assoc array_uintersect_uassoc array_unique array_unshift array_values array_walk
array_walk_recursive
assert_options base_convert base64_decode base64_encode
chunk_split class_exists class_implements class_parents
count_chars debug_backtrace debug_print_backtrace debug_zval_dump
error_get_last error_log error_reporting extension_loaded
file_exists file_get_contents file_put_contents load_file
func_get_arg func_get_args func_num_args function_exists
get_browser get_called_class get_cfg_var get_class get_class_methods get_class_vars
get_current_user get_declared_classes get_declared_interfaces get_defined_constants
get_defined_functions get_defined_vars get_extension_funcs get_headers get_html_translation_table
get_include_path get_included_files get_loaded_extensions get_magic_quotes_gpc get_magic_quotes_runtime
get_meta_tags get_object_vars get_parent_class get_required_filesget_resource_type
gc_collect_cycles gc_disable gc_enable gc_enabled
halt_compiler headers_list headers_sent highlight_file highlight_string
html_entity_decode htmlspecialchars_decode
in_array include_once inclued_get_data
is_a is_array is_binary is_bool is_buffer is_callable is_dir is_double is_executable is_file is_finite
is_float is_infinite is_int is_integer is_link is_long is_nan is_null is_numeric is_object is_readable
is_real is_resource is_scalar is_soap_fault is_string is_subclass_of is_unicode is_uploaded_file
is_writable is_writeable
locale_get_default locale_set_default
number_format override_function parse_str parse_url
php_check_syntax php_ini_loaded_file php_ini_scanned_files php_logo_guid php_sapi_name
php_strip_whitespace php_uname
preg_filter preg_grep preg_last_error preg_match preg_match_all preg_quote preg_replace
preg_replace_callback preg_split print_r
require_once register_shutdown_function register_tick_function
set_error_handler set_exception_handler set_file_buffer set_include_path
set_magic_quotes_runtime set_time_limit shell_exec
str_getcsv str_ireplace str_pad str_repeat str_replace str_rot13 str_shuffle str_split str_word_count
strip_tags substr_compare substr_count substr_replace
time_nanosleep time_sleep_until
token_get_all token_name trigger_error
unregister_tick_function use_soap_error_handler user_error
utf8_decode utf8_encode var_dump var_export
version_compare
zend_logo_guid zend_thread_id zend_version
create_function call_user_func_array
posix_access posix_ctermid posix_get_last_error posix_getcwd posix_getegid
posix_geteuid posix_getgid posix_getgrgid posix_getgrnam posix_getgroups
posix_getlogin posix_getpgid posix_getpgrp posix_getpid posix_getppid
posix_getpwnam posix_getpwuid posix_getrlimit posix_getsid posix_getuid
posix_initgroups posix_isatty posix_kill posix_mkfifo posix_mknod
posix_setegid posix_seteuid posix_setgid posix_setpgid posix_setsid
posix_setuid posix_strerror posix_times posix_ttyname posix_uname
pcntl_alarm pcntl_exec pcntl_fork pcntl_getpriority pcntl_setpriority
pcntl_signal pcntl_signal_dispatch pcntl_sigprocmask pcntl_sigtimedwait
pcntl_sigwaitinfo pcntl_wait pcntl_waitpid pcntl_wexitstatus pcntl_wifexited
pcntl_wifsignaled pcntl_wifstopped pcntl_wstopsig pcntl_wtermsig
]
# TODO: more built-in PHP functions?
EXCEPTIONS = %w[
E_ERROR E_WARNING E_PARSE E_NOTICE E_CORE_ERROR E_CORE_WARNING E_COMPILE_ERROR E_COMPILE_WARNING
E_USER_ERROR E_USER_WARNING E_USER_NOTICE E_DEPRECATED E_USER_DEPRECATED E_ALL E_STRICT
]
CONSTANTS = %w[
null true false self parent
__LINE__ __DIR__ __FILE__ __LINE__
__CLASS__ __NAMESPACE__ __METHOD__ __FUNCTION__
PHP_VERSION PHP_MAJOR_VERSION PHP_MINOR_VERSION PHP_RELEASE_VERSION PHP_VERSION_ID PHP_EXTRA_VERSION PHP_ZTS
PHP_DEBUG PHP_MAXPATHLEN PHP_OS PHP_SAPI PHP_EOL PHP_INT_MAX PHP_INT_SIZE DEFAULT_INCLUDE_PATH
PEAR_INSTALL_DIR PEAR_EXTENSION_DIR PHP_EXTENSION_DIR PHP_PREFIX PHP_BINDIR PHP_LIBDIR PHP_DATADIR
PHP_SYSCONFDIR PHP_LOCALSTATEDIR PHP_CONFIG_FILE_PATH PHP_CONFIG_FILE_SCAN_DIR PHP_SHLIB_SUFFIX
PHP_OUTPUT_HANDLER_START PHP_OUTPUT_HANDLER_CONT PHP_OUTPUT_HANDLER_END
__COMPILER_HALT_OFFSET__
EXTR_OVERWRITE EXTR_SKIP EXTR_PREFIX_SAME EXTR_PREFIX_ALL EXTR_PREFIX_INVALID EXTR_PREFIX_IF_EXISTS
EXTR_IF_EXISTS SORT_ASC SORT_DESC SORT_REGULAR SORT_NUMERIC SORT_STRING CASE_LOWER CASE_UPPER COUNT_NORMAL
COUNT_RECURSIVE ASSERT_ACTIVE ASSERT_CALLBACK ASSERT_BAIL ASSERT_WARNING ASSERT_QUIET_EVAL CONNECTION_ABORTED
CONNECTION_NORMAL CONNECTION_TIMEOUT INI_USER INI_PERDIR INI_SYSTEM INI_ALL M_E M_LOG2E M_LOG10E M_LN2 M_LN10
M_PI M_PI_2 M_PI_4 M_1_PI M_2_PI M_2_SQRTPI M_SQRT2 M_SQRT1_2 CRYPT_SALT_LENGTH CRYPT_STD_DES CRYPT_EXT_DES
CRYPT_MD5 CRYPT_BLOWFISH DIRECTORY_SEPARATOR SEEK_SET SEEK_CUR SEEK_END LOCK_SH LOCK_EX LOCK_UN LOCK_NB
HTML_SPECIALCHARS HTML_ENTITIES ENT_COMPAT ENT_QUOTES ENT_NOQUOTES INFO_GENERAL INFO_CREDITS
INFO_CONFIGURATION INFO_MODULES INFO_ENVIRONMENT INFO_VARIABLES INFO_LICENSE INFO_ALL CREDITS_GROUP
CREDITS_GENERAL CREDITS_SAPI CREDITS_MODULES CREDITS_DOCS CREDITS_FULLPAGE CREDITS_QA CREDITS_ALL STR_PAD_LEFT
STR_PAD_RIGHT STR_PAD_BOTH PATHINFO_DIRNAME PATHINFO_BASENAME PATHINFO_EXTENSION PATH_SEPARATOR CHAR_MAX
LC_CTYPE LC_NUMERIC LC_TIME LC_COLLATE LC_MONETARY LC_ALL LC_MESSAGES ABDAY_1 ABDAY_2 ABDAY_3 ABDAY_4 ABDAY_5
ABDAY_6 ABDAY_7 DAY_1 DAY_2 DAY_3 DAY_4 DAY_5 DAY_6 DAY_7 ABMON_1 ABMON_2 ABMON_3 ABMON_4 ABMON_5 ABMON_6
ABMON_7 ABMON_8 ABMON_9 ABMON_10 ABMON_11 ABMON_12 MON_1 MON_2 MON_3 MON_4 MON_5 MON_6 MON_7 MON_8 MON_9
MON_10 MON_11 MON_12 AM_STR PM_STR D_T_FMT D_FMT T_FMT T_FMT_AMPM ERA ERA_YEAR ERA_D_T_FMT ERA_D_FMT ERA_T_FMT
ALT_DIGITS INT_CURR_SYMBOL CURRENCY_SYMBOL CRNCYSTR MON_DECIMAL_POINT MON_THOUSANDS_SEP MON_GROUPING
POSITIVE_SIGN NEGATIVE_SIGN INT_FRAC_DIGITS FRAC_DIGITS P_CS_PRECEDES P_SEP_BY_SPACE N_CS_PRECEDES
N_SEP_BY_SPACE P_SIGN_POSN N_SIGN_POSN DECIMAL_POINT RADIXCHAR THOUSANDS_SEP THOUSEP GROUPING YESEXPR NOEXPR
YESSTR NOSTR CODESET LOG_EMERG LOG_ALERT LOG_CRIT LOG_ERR LOG_WARNING LOG_NOTICE LOG_INFO LOG_DEBUG LOG_KERN
LOG_USER LOG_MAIL LOG_DAEMON LOG_AUTH LOG_SYSLOG LOG_LPR LOG_NEWS LOG_UUCP LOG_CRON LOG_AUTHPRIV LOG_LOCAL0
LOG_LOCAL1 LOG_LOCAL2 LOG_LOCAL3 LOG_LOCAL4 LOG_LOCAL5 LOG_LOCAL6 LOG_LOCAL7 LOG_PID LOG_CONS LOG_ODELAY
LOG_NDELAY LOG_NOWAIT LOG_PERROR
]
PREDEFINED = %w[
$GLOBALS $_SERVER $_GET $_POST $_FILES $_REQUEST $_SESSION $_ENV
$_COOKIE $php_errormsg $HTTP_RAW_POST_DATA $http_response_header
$argc $argv
]
IDENT_KIND = WordList::CaseIgnoring.new(:ident).
add(KEYWORDS, :keyword).
add(TYPES, :predefined_type).
add(LANGUAGE_CONSTRUCTS, :keyword).
add(BUILTIN_FUNCTIONS, :predefined).
add(CLASSES, :predefined_constant).
add(EXCEPTIONS, :exception).
add(CONSTANTS, :predefined_constant)
VARIABLE_KIND = WordList.new(:local_variable).
add(PREDEFINED, :predefined)
end
module RE # :nodoc:
PHP_START = /
<script\s+[^>]*?language\s*=\s*"php"[^>]*?> |
<script\s+[^>]*?language\s*=\s*'php'[^>]*?> |
<\?php\d? |
<\?(?!xml)
/xi
PHP_END = %r!
</script> |
\?>
!xi
HTML_INDICATOR = /<!DOCTYPE html|<(?:html|body|div|p)[> ]/i
IDENTIFIER = /[a-z_\x7f-\xFF][a-z0-9_\x7f-\xFF]*/i
VARIABLE = /\$#{IDENTIFIER}/
OPERATOR = /
\.(?!\d)=? | # dot that is not decimal point, string concatenation
&& | \|\| | # logic
:: | -> | => | # scope, member, dictionary
\\(?!\n) | # namespace
\+\+ | -- | # increment, decrement
[,;?:()\[\]{}] | # simple delimiters
[-+*\/%&|^]=? | # ordinary math, binary logic, assignment shortcuts
[~$] | # whatever
=& | # reference assignment
[=!]=?=? | <> | # comparison and assignment
<<=? | >>=? | [<>]=? # comparison and shift
/x
end
protected
def scan_tokens encoder, options
if check(RE::PHP_START) || # starts with <?
(match?(/\s*<\S/) && check(/.{1,1000}#{RE::PHP_START}/om)) || # starts with tag and contains <?
check(/.{0,1000}#{RE::HTML_INDICATOR}/om) ||
check(/.{1,100}#{RE::PHP_START}/om) # PHP start after max 100 chars
# is HTML with embedded PHP, so start with HTML
states = [:initial]
else
# is just PHP, so start with PHP surrounded by HTML
states = [:initial, :php]
end
label_expected = true
case_expected = false
heredoc_delimiter = nil
delimiter = nil
modifier = nil
until eos?
case states.last
when :initial # HTML
if match = scan(RE::PHP_START)
encoder.text_token match, :inline_delimiter
label_expected = true
states << :php
else
match = scan_until(/(?=#{RE::PHP_START})/o) || scan_rest
@html_scanner.tokenize match unless match.empty?
end
when :php
if match = scan(/\s+/)
encoder.text_token match, :space
elsif match = scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo)
encoder.text_token match, :comment
elsif match = scan(RE::IDENTIFIER)
kind = Words::IDENT_KIND[match]
if kind == :ident && label_expected && check(/:(?!:)/)
kind = :label
label_expected = true
else
label_expected = false
if kind == :ident && match =~ /^[A-Z]/
kind = :constant
elsif kind == :keyword
case match
when 'class'
states << :class_expected
when 'function'
states << :function_expected
when 'case', 'default'
case_expected = true
end
elsif match == 'b' && check(/['"]/) # binary string literal
modifier = match
next
end
end
encoder.text_token match, kind
elsif match = scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
label_expected = false
encoder.text_token match, :float
elsif match = scan(/0x[0-9a-fA-F]+/)
label_expected = false
encoder.text_token match, :hex
elsif match = scan(/\d+/)
label_expected = false
encoder.text_token match, :integer
elsif match = scan(/['"`]/)
encoder.begin_group :string
if modifier
encoder.text_token modifier, :modifier
modifier = nil
end
delimiter = match
encoder.text_token match, :delimiter
states.push match == "'" ? :sqstring : :dqstring
elsif match = scan(RE::VARIABLE)
label_expected = false
encoder.text_token match, Words::VARIABLE_KIND[match]
elsif match = scan(/\{/)
encoder.text_token match, :operator
label_expected = true
states.push :php
elsif match = scan(/\}/)
if states.size == 1
encoder.text_token match, :error
else
states.pop
if states.last.is_a?(::Array)
delimiter = states.last[1]
states[-1] = states.last[0]
encoder.text_token match, :delimiter
encoder.end_group :inline
else
encoder.text_token match, :operator
label_expected = true
end
end
elsif match = scan(/@/)
label_expected = false
encoder.text_token match, :exception
elsif match = scan(RE::PHP_END)
encoder.text_token match, :inline_delimiter
states = [:initial]
elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
encoder.begin_group :string
# warn 'heredoc in heredoc?' if heredoc_delimiter
heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3])
encoder.text_token match, :delimiter
states.push self[3] ? :sqstring : :dqstring
heredoc_delimiter = /#{heredoc_delimiter}(?=;?$)/
elsif match = scan(/#{RE::OPERATOR}/o)
label_expected = match == ';'
if case_expected
label_expected = true if match == ':'
case_expected = false
end
encoder.text_token match, :operator
else
encoder.text_token getch, :error
end
when :sqstring
if match = scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/)
encoder.text_token match, :content
elsif !heredoc_delimiter && match = scan(/'/)
encoder.text_token match, :delimiter
encoder.end_group :string
delimiter = nil
label_expected = false
states.pop
elsif heredoc_delimiter && match = scan(/\n/)
if scan heredoc_delimiter
encoder.text_token "\n", :content
encoder.text_token matched, :delimiter
encoder.end_group :string
heredoc_delimiter = nil
label_expected = false
states.pop
else
encoder.text_token match, :content
end
elsif match = scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/)
encoder.text_token match, :char
elsif match = scan(/\\./m)
encoder.text_token match, :content
elsif match = scan(/\\/)
encoder.text_token match, :error
else
states.pop
end
when :dqstring
if match = scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/))
encoder.text_token match, :content
elsif !heredoc_delimiter && match = scan(delimiter == '"' ? /"/ : /`/)
encoder.text_token match, :delimiter
encoder.end_group :string
delimiter = nil
label_expected = false
states.pop
elsif heredoc_delimiter && match = scan(/\n/)
if scan heredoc_delimiter
encoder.text_token "\n", :content
encoder.text_token matched, :delimiter
encoder.end_group :string
heredoc_delimiter = nil
label_expected = false
states.pop
else
encoder.text_token match, :content
end
elsif match = scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
encoder.text_token match, :char
elsif match = scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/))
encoder.text_token match, :char
elsif match = scan(/\\./m)
encoder.text_token match, :content
elsif match = scan(/\\/)
encoder.text_token match, :error
elsif match = scan(/#{RE::VARIABLE}/o)
if check(/\[#{RE::IDENTIFIER}\]/o)
encoder.begin_group :inline
encoder.text_token match, :local_variable
encoder.text_token scan(/\[/), :operator
encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident
encoder.text_token scan(/\]/), :operator
encoder.end_group :inline
elsif check(/\[/)
match << scan(/\[['"]?#{RE::IDENTIFIER}?['"]?\]?/o)
encoder.text_token match, :error
elsif check(/->#{RE::IDENTIFIER}/o)
encoder.begin_group :inline
encoder.text_token match, :local_variable
encoder.text_token scan(/->/), :operator
encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident
encoder.end_group :inline
elsif check(/->/)
match << scan(/->/)
encoder.text_token match, :error
else
encoder.text_token match, :local_variable
end
elsif match = scan(/\{/)
if check(/\$/)
encoder.begin_group :inline
states[-1] = [states.last, delimiter]
delimiter = nil
states.push :php
encoder.text_token match, :delimiter
else
encoder.text_token match, :content
end
elsif match = scan(/\$\{#{RE::IDENTIFIER}\}/o)
encoder.text_token match, :local_variable
elsif match = scan(/\$/)
encoder.text_token match, :content
else
states.pop
end
when :class_expected
if match = scan(/\s+/)
encoder.text_token match, :space
elsif match = scan(/#{RE::IDENTIFIER}/o)
encoder.text_token match, :class
states.pop
else
states.pop
end
when :function_expected
if match = scan(/\s+/)
encoder.text_token match, :space
elsif match = scan(/&/)
encoder.text_token match, :operator
elsif match = scan(/#{RE::IDENTIFIER}/o)
encoder.text_token match, :function
states.pop
else
states.pop
end
else
raise_inspect 'Unknown state!', encoder, states
end
end
encoder
end
end
end
end

View File

@ -0,0 +1,287 @@
module CodeRay
module Scanners
# Scanner for Python. Supports Python 3.
#
# Based on pygments' PythonLexer, see
# http://dev.pocoo.org/projects/pygments/browser/pygments/lexers/agile.py.
class Python < Scanner
register_for :python
file_extension 'py'
KEYWORDS = [
'and', 'as', 'assert', 'break', 'class', 'continue', 'def',
'del', 'elif', 'else', 'except', 'finally', 'for',
'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'not',
'or', 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield',
'nonlocal', # new in Python 3
] # :nodoc:
OLD_KEYWORDS = [
'exec', 'print', # gone in Python 3
] # :nodoc:
PREDEFINED_METHODS_AND_TYPES = %w[
__import__ abs all any apply basestring bin bool buffer
bytearray bytes callable chr classmethod cmp coerce compile
complex delattr dict dir divmod enumerate eval execfile exit
file filter float frozenset getattr globals hasattr hash hex id
input int intern isinstance issubclass iter len list locals
long map max min next object oct open ord pow property range
raw_input reduce reload repr reversed round set setattr slice
sorted staticmethod str sum super tuple type unichr unicode
vars xrange zip
] # :nodoc:
PREDEFINED_EXCEPTIONS = %w[
ArithmeticError AssertionError AttributeError
BaseException DeprecationWarning EOFError EnvironmentError
Exception FloatingPointError FutureWarning GeneratorExit IOError
ImportError ImportWarning IndentationError IndexError KeyError
KeyboardInterrupt LookupError MemoryError NameError
NotImplemented NotImplementedError OSError OverflowError
OverflowWarning PendingDeprecationWarning ReferenceError
RuntimeError RuntimeWarning StandardError StopIteration
SyntaxError SyntaxWarning SystemError SystemExit TabError
TypeError UnboundLocalError UnicodeDecodeError
UnicodeEncodeError UnicodeError UnicodeTranslateError
UnicodeWarning UserWarning ValueError Warning ZeroDivisionError
] # :nodoc:
PREDEFINED_VARIABLES_AND_CONSTANTS = [
'False', 'True', 'None', # "keywords" since Python 3
'self', 'Ellipsis', 'NotImplemented',
] # :nodoc:
IDENT_KIND = WordList.new(:ident).
add(KEYWORDS, :keyword).
add(OLD_KEYWORDS, :old_keyword).
add(PREDEFINED_METHODS_AND_TYPES, :predefined).
add(PREDEFINED_VARIABLES_AND_CONSTANTS, :predefined_constant).
add(PREDEFINED_EXCEPTIONS, :exception) # :nodoc:
NAME = / [^\W\d] \w* /x # :nodoc:
ESCAPE = / [abfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} | N\{[-\w ]+\} /x # :nodoc:
OPERATOR = /
\.\.\. | # ellipsis
\.(?!\d) | # dot but not decimal point
[,;:()\[\]{}] | # simple delimiters
\/\/=? | \*\*=? | # special math
[-+*\/%&|^]=? | # ordinary math and binary logic
[~`] | # binary complement and inspection
<<=? | >>=? | [<>=]=? | != # comparison and assignment
/x # :nodoc:
STRING_DELIMITER_REGEXP = Hash.new { |h, delimiter|
h[delimiter] = Regexp.union delimiter # :nodoc:
}
STRING_CONTENT_REGEXP = Hash.new { |h, delimiter|
h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x # :nodoc:
}
DEF_NEW_STATE = WordList.new(:initial).
add(%w(def), :def_expected).
add(%w(import from), :include_expected).
add(%w(class), :class_expected) # :nodoc:
DESCRIPTOR = /
#{NAME}
(?: \. #{NAME} )*
| \*
/x # :nodoc:
DOCSTRING_COMING = /
[ \t]* u?r? ("""|''')
/x # :nodoc:
protected
def scan_tokens encoder, options
state = :initial
string_delimiter = nil
string_raw = false
string_type = nil
docstring_coming = match?(/#{DOCSTRING_COMING}/o)
last_token_dot = false
unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
from_import_state = []
until eos?
if state == :string
if match = scan(STRING_DELIMITER_REGEXP[string_delimiter])
encoder.text_token match, :delimiter
encoder.end_group string_type
string_type = nil
state = :initial
next
elsif string_delimiter.size == 3 && match = scan(/\n/)
encoder.text_token match, :content
elsif match = scan(STRING_CONTENT_REGEXP[string_delimiter])
encoder.text_token match, :content
elsif !string_raw && match = scan(/ \\ #{ESCAPE} /ox)
encoder.text_token match, :char
elsif match = scan(/ \\ #{UNICODE_ESCAPE} /ox)
encoder.text_token match, :char
elsif match = scan(/ \\ . /x)
encoder.text_token match, :content
elsif match = scan(/ \\ | $ /x)
encoder.end_group string_type
string_type = nil
encoder.text_token match, :error
state = :initial
else
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder, state
end
elsif match = scan(/ [ \t]+ | \\?\n /x)
encoder.text_token match, :space
if match == "\n"
state = :initial if state == :include_expected
docstring_coming = true if match?(/#{DOCSTRING_COMING}/o)
end
next
elsif match = scan(/ \# [^\n]* /mx)
encoder.text_token match, :comment
next
elsif state == :initial
if match = scan(/#{OPERATOR}/o)
encoder.text_token match, :operator
elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
string_delimiter = self[2]
string_type = docstring_coming ? :docstring : :string
docstring_coming = false if docstring_coming
encoder.begin_group string_type
string_raw = false
modifiers = self[1]
unless modifiers.empty?
string_raw = !!modifiers.index(?r)
encoder.text_token modifiers, :modifier
match = string_delimiter
end
state = :string
encoder.text_token match, :delimiter
# TODO: backticks
elsif match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
kind = IDENT_KIND[match]
# TODO: keyword arguments
kind = :ident if last_token_dot
if kind == :old_keyword
kind = check(/\(/) ? :ident : :keyword
elsif kind == :predefined && check(/ *=/)
kind = :ident
elsif kind == :keyword
state = DEF_NEW_STATE[match]
from_import_state << match.to_sym if state == :include_expected
end
encoder.text_token match, kind
elsif match = scan(/@[a-zA-Z0-9_.]+[lL]?/)
encoder.text_token match, :decorator
elsif match = scan(/0[xX][0-9A-Fa-f]+[lL]?/)
encoder.text_token match, :hex
elsif match = scan(/0[bB][01]+[lL]?/)
encoder.text_token match, :binary
elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
if scan(/[jJ]/)
match << matched
encoder.text_token match, :imaginary
else
encoder.text_token match, :float
end
elsif match = scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
encoder.text_token match, :octal
elsif match = scan(/\d+([lL])?/)
if self[1] == nil && scan(/[jJ]/)
match << matched
encoder.text_token match, :imaginary
else
encoder.text_token match, :integer
end
else
encoder.text_token getch, :error
end
elsif state == :def_expected
state = :initial
if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
encoder.text_token match, :method
else
next
end
elsif state == :class_expected
state = :initial
if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
encoder.text_token match, :class
else
next
end
elsif state == :include_expected
if match = scan(unicode ? /#{DESCRIPTOR}/uo : /#{DESCRIPTOR}/o)
if match == 'as'
encoder.text_token match, :keyword
from_import_state << :as
elsif from_import_state.first == :from && match == 'import'
encoder.text_token match, :keyword
from_import_state << :import
elsif from_import_state.last == :as
# encoder.text_token match, match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method
encoder.text_token match, :ident
from_import_state.pop
elsif IDENT_KIND[match] == :keyword
unscan
match = nil
state = :initial
next
else
encoder.text_token match, :include
end
elsif match = scan(/,/)
from_import_state.pop if from_import_state.last == :as
encoder.text_token match, :operator
else
from_import_state = []
state = :initial
next
end
else
raise_inspect 'Unknown state', encoder, state
end
last_token_dot = match == '.'
end
if state == :string
encoder.end_group string_type
end
encoder
end
end
end
end

View File

@ -0,0 +1,66 @@
module CodeRay
module Scanners
# = Debug Scanner
#
# Parses the output of the Encoders::Debug encoder.
class Raydebug < Scanner
register_for :raydebug
file_extension 'raydebug'
title 'CodeRay Token Dump'
protected
def scan_tokens encoder, options
opened_tokens = []
until eos?
if match = scan(/\s+/)
encoder.text_token match, :space
elsif match = scan(/ (\w+) \( ( [^\)\\]* ( \\. [^\)\\]* )* ) /x)
kind = self[1]
encoder.text_token kind, :class
encoder.text_token '(', :operator
match = self[2]
encoder.text_token match, kind.to_sym
encoder.text_token match, :operator if match = scan(/\)/)
elsif match = scan(/ (\w+) ([<\[]) /x)
kind = self[1]
case self[2]
when '<'
encoder.text_token kind, :class
when '['
encoder.text_token kind, :class
else
raise 'CodeRay bug: This case should not be reached.'
end
kind = kind.to_sym
opened_tokens << kind
encoder.begin_group kind
encoder.text_token self[2], :operator
elsif !opened_tokens.empty? && match = scan(/ [>\]] /x)
encoder.text_token match, :operator
encoder.end_group opened_tokens.pop
else
encoder.text_token getch, :space
end
end
encoder.end_group opened_tokens.pop until opened_tokens.empty?
encoder
end
end
end
end

View File

@ -0,0 +1,461 @@
module CodeRay
module Scanners
# This scanner is really complex, since Ruby _is_ a complex language!
#
# It tries to highlight 100% of all common code,
# and 90% of strange codes.
#
# It is optimized for HTML highlighting, and is not very useful for
# parsing or pretty printing.
class Ruby < Scanner
register_for :ruby
file_extension 'rb'
autoload :Patterns, 'coderay/scanners/ruby/patterns'
autoload :StringState, 'coderay/scanners/ruby/string_state'
def interpreted_string_state
StringState.new :string, true, '"'
end
protected
def setup
@state = :initial
end
def scan_tokens encoder, options
state, heredocs = options[:state] || @state
heredocs = heredocs.dup if heredocs.is_a?(Array)
if state && state.instance_of?(StringState)
encoder.begin_group state.type
end
last_state = nil
method_call_expected = false
value_expected = true
inline_block_stack = nil
inline_block_curly_depth = 0
if heredocs
state = heredocs.shift
encoder.begin_group state.type
heredocs = nil if heredocs.empty?
end
# def_object_stack = nil
# def_object_paren_depth = 0
patterns = Patterns # avoid constant lookup
unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
until eos?
if state.instance_of? ::Symbol
if match = scan(/[ \t\f\v]+/)
encoder.text_token match, :space
elsif match = scan(/\n/)
if heredocs
unscan # heredoc scanning needs \n at start
state = heredocs.shift
encoder.begin_group state.type
heredocs = nil if heredocs.empty?
else
state = :initial if state == :undef_comma_expected
encoder.text_token match, :space
value_expected = true
end
elsif match = scan(bol? ? / \#(!)?.* | #{patterns::RUBYDOC_OR_DATA} /ox : /\#.*/)
encoder.text_token match, self[1] ? :doctype : :comment
elsif match = scan(/\\\n/)
if heredocs
unscan # heredoc scanning needs \n at start
encoder.text_token scan(/\\/), :space
state = heredocs.shift
encoder.begin_group state.type
heredocs = nil if heredocs.empty?
else
encoder.text_token match, :space
end
elsif state == :initial
# IDENTS #
if !method_call_expected &&
match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
/#{patterns::METHOD_NAME}/o)
value_expected = false
kind = patterns::IDENT_KIND[match]
if kind == :ident
if match[/\A[A-Z]/] && !(match[/[!?]$/] || match?(/\(/))
kind = :constant
end
elsif kind == :keyword
state = patterns::KEYWORD_NEW_STATE[match]
value_expected = true if patterns::KEYWORDS_EXPECTING_VALUE[match]
end
value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o)
encoder.text_token match, kind
elsif method_call_expected &&
match = scan(unicode ? /#{patterns::METHOD_AFTER_DOT}/uo :
/#{patterns::METHOD_AFTER_DOT}/o)
if method_call_expected == '::' && match[/\A[A-Z]/] && !match?(/\(/)
encoder.text_token match, :constant
else
encoder.text_token match, :ident
end
method_call_expected = false
value_expected = check(/#{patterns::VALUE_FOLLOWS}/o)
# OPERATORS #
elsif !method_call_expected && match = scan(/ (\.(?!\.)|::) | (?: \.\.\.? | ==?=? | [,\(\[\{] )() | [\)\]\}] /x)
method_call_expected = self[1]
value_expected = !method_call_expected && self[2]
if inline_block_stack
case match
when '{'
inline_block_curly_depth += 1
when '}'
inline_block_curly_depth -= 1
if inline_block_curly_depth == 0 # closing brace of inline block reached
state, inline_block_curly_depth, heredocs = inline_block_stack.pop
inline_block_stack = nil if inline_block_stack.empty?
heredocs = nil if heredocs && heredocs.empty?
encoder.text_token match, :inline_delimiter
encoder.end_group :inline
next
end
end
end
encoder.text_token match, :operator
elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
/#{patterns::SYMBOL}/o)
case delim = match[1]
when ?', ?"
encoder.begin_group :symbol
encoder.text_token ':', :symbol
match = delim.chr
encoder.text_token match, :delimiter
state = self.class::StringState.new :symbol, delim == ?", match
else
encoder.text_token match, :symbol
value_expected = false
end
elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx)
encoder.begin_group :string
if match.size == 1
encoder.text_token match, :delimiter
state = self.class::StringState.new :string, match == '"', match # important for streaming
else
encoder.text_token match[0,1], :delimiter
encoder.text_token match[1..-2], :content if match.size > 2
encoder.text_token match[-1,1], :delimiter
encoder.end_group :string
value_expected = false
end
elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
/#{patterns::INSTANCE_VARIABLE}/o)
value_expected = false
encoder.text_token match, :instance_variable
elsif value_expected && match = scan(/\//)
encoder.begin_group :regexp
encoder.text_token match, :delimiter
state = self.class::StringState.new :regexp, true, '/'
elsif match = scan(value_expected ? /[-+]?#{patterns::NUMERIC}/o : /#{patterns::NUMERIC}/o)
if method_call_expected
encoder.text_token match, :error
method_call_expected = false
else
encoder.text_token match, self[1] ? :float : :integer # TODO: send :hex/:octal/:binary
end
value_expected = false
elsif match = scan(/ [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /x)
value_expected = true
encoder.text_token match, :operator
elsif value_expected && match = scan(/#{patterns::HEREDOC_OPEN}/o)
quote = self[3]
delim = self[quote ? 4 : 2]
kind = patterns::QUOTE_TO_TYPE[quote]
encoder.begin_group kind
encoder.text_token match, :delimiter
encoder.end_group kind
heredocs ||= [] # create heredocs if empty
heredocs << self.class::StringState.new(kind, quote != "'", delim,
self[1] == '-' ? :indented : :linestart)
value_expected = false
elsif value_expected && match = scan(/#{patterns::FANCY_STRING_START}/o)
kind = patterns::FANCY_STRING_KIND[self[1]]
encoder.begin_group kind
state = self.class::StringState.new kind, patterns::FANCY_STRING_INTERPRETED[self[1]], self[2]
encoder.text_token match, :delimiter
elsif value_expected && match = scan(/#{patterns::CHARACTER}/o)
value_expected = false
encoder.text_token match, :integer
elsif match = scan(/ %=? | <(?:<|=>?)? | \? /x)
value_expected = true
encoder.text_token match, :operator
elsif match = scan(/`/)
encoder.begin_group :shell
encoder.text_token match, :delimiter
state = self.class::StringState.new :shell, true, match
elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo :
/#{patterns::GLOBAL_VARIABLE}/o)
encoder.text_token match, :global_variable
value_expected = false
elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo :
/#{patterns::CLASS_VARIABLE}/o)
encoder.text_token match, :class_variable
value_expected = false
elsif match = scan(/\\\z/)
encoder.text_token match, :space
else
if method_call_expected
method_call_expected = false
next
end
unless unicode
# check for unicode
$DEBUG_BEFORE, $DEBUG = $DEBUG, false
begin
if check(/./mu).size > 1
# seems like we should try again with unicode
unicode = true
end
rescue
# bad unicode char; use getch
ensure
$DEBUG = $DEBUG_BEFORE
end
next if unicode
end
encoder.text_token getch, :error
end
if last_state
state = last_state
last_state = nil
end
elsif state == :def_expected
if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
encoder.text_token match, :method
state = :initial
else
last_state = :dot_expected
state = :initial
end
elsif state == :dot_expected
if match = scan(/\.|::/)
# invalid definition
state = :def_expected
encoder.text_token match, :operator
else
state = :initial
end
elsif state == :module_expected
if match = scan(/<</)
encoder.text_token match, :operator
else
state = :initial
if match = scan(unicode ? / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /oux :
/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
encoder.text_token match, :class
end
end
elsif state == :undef_expected
state = :undef_comma_expected
if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
encoder.text_token match, :method
elsif match = scan(/#{patterns::SYMBOL}/o)
case delim = match[1]
when ?', ?"
encoder.begin_group :symbol
encoder.text_token ':', :symbol
match = delim.chr
encoder.text_token match, :delimiter
state = self.class::StringState.new :symbol, delim == ?", match
state.next_state = :undef_comma_expected
else
encoder.text_token match, :symbol
end
else
state = :initial
end
elsif state == :undef_comma_expected
if match = scan(/,/)
encoder.text_token match, :operator
state = :undef_expected
else
state = :initial
end
elsif state == :alias_expected
match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
/(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
if match
encoder.text_token self[1], (self[1][0] == ?: ? :symbol : :method)
encoder.text_token self[2], :space
encoder.text_token self[3], (self[3][0] == ?: ? :symbol : :method)
end
state = :initial
else
#:nocov:
raise_inspect 'Unknown state: %p' % [state], encoder
#:nocov:
end
else # StringState
match = scan_until(state.pattern) || scan_rest
unless match.empty?
encoder.text_token match, :content
break if eos?
end
if state.heredoc && self[1] # end of heredoc
match = getch
match << scan_until(/$/) unless eos?
encoder.text_token match, :delimiter unless match.empty?
encoder.end_group state.type
state = state.next_state
next
end
case match = getch
when state.delim
if state.paren_depth
state.paren_depth -= 1
if state.paren_depth > 0
encoder.text_token match, :content
next
end
end
encoder.text_token match, :delimiter
if state.type == :regexp && !eos?
match = scan(/#{patterns::REGEXP_MODIFIERS}/o)
encoder.text_token match, :modifier unless match.empty?
end
encoder.end_group state.type
value_expected = false
state = state.next_state
when '\\'
if state.interpreted
if esc = scan(/#{patterns::ESCAPE}/o)
encoder.text_token match + esc, :char
else
encoder.text_token match, :error
end
else
case esc = getch
when nil
encoder.text_token match, :content
when state.delim, '\\'
encoder.text_token match + esc, :char
else
encoder.text_token match + esc, :content
end
end
when '#'
case peek(1)
when '{'
inline_block_stack ||= []
inline_block_stack << [state, inline_block_curly_depth, heredocs]
value_expected = true
state = :initial
inline_block_curly_depth = 1
encoder.begin_group :inline
encoder.text_token match + getch, :inline_delimiter
when '$', '@'
encoder.text_token match, :escape
last_state = state
state = :initial
else
#:nocov:
raise_inspect 'else-case # reached; #%p not handled' % [peek(1)], encoder
#:nocov:
end
when state.opening_paren
state.paren_depth += 1
encoder.text_token match, :content
else
#:nocov
raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], encoder
#:nocov:
end
end
end
# cleaning up
if state.is_a? StringState
encoder.end_group state.type
end
if options[:keep_state]
if state.is_a?(StringState) && state.heredoc
(heredocs ||= []).unshift state
state = :initial
elsif heredocs && heredocs.empty?
heredocs = nil
end
@state = state, heredocs
end
if inline_block_stack
until inline_block_stack.empty?
state, = *inline_block_stack.pop
encoder.end_group :inline
encoder.end_group state.type
end
end
encoder
end
end
end
end

View File

@ -0,0 +1,174 @@
module CodeRay module Scanners
# by Josh Goebel
class SQL < Scanner
register_for :sql
KEYWORDS = %w(
all and any as before begin between by case check collate
each else end exists
for foreign from full group having if in inner is join
like not of on or order outer over references
then to union using values when where
left right distinct
)
OBJECTS = %w(
database databases table tables column columns fields index constraint
constraints transaction function procedure row key view trigger
)
COMMANDS = %w(
add alter comment create delete drop grant insert into select update set
show prompt begin commit rollback replace truncate
)
PREDEFINED_TYPES = %w(
char varchar varchar2 enum binary text tinytext mediumtext
longtext blob tinyblob mediumblob longblob timestamp
date time datetime year double decimal float int
integer tinyint mediumint bigint smallint unsigned bit
bool boolean hex bin oct
)
PREDEFINED_FUNCTIONS = %w( sum cast substring abs pi count min max avg now )
DIRECTIVES = %w(
auto_increment unique default charset initially deferred
deferrable cascade immediate read write asc desc after
primary foreign return engine
)
PREDEFINED_CONSTANTS = %w( null true false )
IDENT_KIND = WordList::CaseIgnoring.new(:ident).
add(KEYWORDS, :keyword).
add(OBJECTS, :type).
add(COMMANDS, :class).
add(PREDEFINED_TYPES, :predefined_type).
add(PREDEFINED_CONSTANTS, :predefined_constant).
add(PREDEFINED_FUNCTIONS, :predefined).
add(DIRECTIVES, :directive)
ESCAPE = / [rbfntv\n\\\/'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | . /mx
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
STRING_PREFIXES = /[xnb]|_\w+/i
def scan_tokens encoder, options
state = :initial
string_type = nil
string_content = ''
name_expected = false
until eos?
if state == :initial
if match = scan(/ \s+ | \\\n /x)
encoder.text_token match, :space
elsif match = scan(/(?:--\s?|#).*/)
encoder.text_token match, :comment
elsif match = scan(%r( /\* (!)? (?: .*? \*/ | .* ) )mx)
encoder.text_token match, self[1] ? :directive : :comment
elsif match = scan(/ [*\/=<>:;,!&^|()\[\]{}~%] | [-+\.](?!\d) /x)
name_expected = true if match == '.' && check(/[A-Za-z_]/)
encoder.text_token match, :operator
elsif match = scan(/(#{STRING_PREFIXES})?([`"'])/o)
prefix = self[1]
string_type = self[2]
encoder.begin_group :string
encoder.text_token prefix, :modifier if prefix
match = string_type
state = :string
encoder.text_token match, :delimiter
elsif match = scan(/ @? [A-Za-z_][A-Za-z_0-9]* /x)
encoder.text_token match, name_expected ? :ident : (match[0] == ?@ ? :variable : IDENT_KIND[match])
name_expected = false
elsif match = scan(/0[xX][0-9A-Fa-f]+/)
encoder.text_token match, :hex
elsif match = scan(/0[0-7]+(?![89.eEfF])/)
encoder.text_token match, :octal
elsif match = scan(/[-+]?(?>\d+)(?![.eEfF])/)
encoder.text_token match, :integer
elsif match = scan(/[-+]?(?:\d[fF]|\d*\.\d+(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+)/)
encoder.text_token match, :float
elsif match = scan(/\\N/)
encoder.text_token match, :predefined_constant
else
encoder.text_token getch, :error
end
elsif state == :string
if match = scan(/[^\\"'`]+/)
string_content << match
next
elsif match = scan(/["'`]/)
if string_type == match
if peek(1) == string_type # doubling means escape
string_content << string_type << getch
next
end
unless string_content.empty?
encoder.text_token string_content, :content
string_content = ''
end
encoder.text_token match, :delimiter
encoder.end_group :string
state = :initial
string_type = nil
else
string_content << match
end
elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
unless string_content.empty?
encoder.text_token string_content, :content
string_content = ''
end
encoder.text_token match, :char
elsif match = scan(/ \\ . /mox)
string_content << match
next
elsif match = scan(/ \\ | $ /x)
unless string_content.empty?
encoder.text_token string_content, :content
string_content = ''
end
encoder.text_token match, :error
state = :initial
else
raise "else case \" reached; %p not handled." % peek(1), encoder
end
else
raise 'else-case reached', encoder
end
end
if state == :string
encoder.end_group state
end
encoder
end
end
end end

View File

@ -0,0 +1,26 @@
module CodeRay
module Scanners
# Scanner for plain text.
#
# Yields just one token of the kind :plain.
#
# Alias: +plaintext+, +plain+
class Text < Scanner
register_for :text
title 'Plain text'
KINDS_NOT_LOC = [:plain] # :nodoc:
protected
def scan_tokens encoder, options
encoder.text_token string, :plain
encoder
end
end
end
end

View File

@ -0,0 +1,17 @@
module CodeRay
module Scanners
load :html
# Scanner for XML.
#
# Currently this is the same scanner as Scanners::HTML.
class XML < HTML
register_for :xml
file_extension 'xml'
end
end
end

View File

@ -0,0 +1,140 @@
module CodeRay
module Scanners
# Scanner for YAML.
#
# Based on the YAML scanner from Syntax by Jamis Buck.
class YAML < Scanner
register_for :yaml
file_extension 'yml'
KINDS_NOT_LOC = :all
protected
def scan_tokens encoder, options
state = :initial
key_indent = string_indent = 0
until eos?
key_indent = nil if bol?
if match = scan(/ +[\t ]*/)
encoder.text_token match, :space
elsif match = scan(/\n+/)
encoder.text_token match, :space
state = :initial if match.index(?\n)
elsif match = scan(/#.*/)
encoder.text_token match, :comment
elsif bol? and case
when match = scan(/---|\.\.\./)
encoder.begin_group :head
encoder.text_token match, :head
encoder.end_group :head
next
when match = scan(/%.*/)
encoder.text_token match, :doctype
next
end
elsif state == :value and case
when !check(/(?:"[^"]*")(?=: |:$)/) && match = scan(/"/)
encoder.begin_group :string
encoder.text_token match, :delimiter
encoder.text_token match, :content if match = scan(/ [^"\\]* (?: \\. [^"\\]* )* /mx)
encoder.text_token match, :delimiter if match = scan(/"/)
encoder.end_group :string
next
when match = scan(/[|>][-+]?/)
encoder.begin_group :string
encoder.text_token match, :delimiter
string_indent = key_indent || column(pos - match.size) - 1
encoder.text_token matched, :content if scan(/(?:\n+ {#{string_indent + 1}}.*)+/)
encoder.end_group :string
next
when match = scan(/(?![!"*&]).+?(?=$|\s+#)/)
encoder.begin_group :string
encoder.text_token match, :content
string_indent = key_indent || column(pos - match.size) - 1
encoder.text_token matched, :content if scan(/(?:\n+ {#{string_indent + 1}}.*)+/)
encoder.end_group :string
next
end
elsif case
when match = scan(/[-:](?= |$)/)
state = :value if state == :colon && (match == ':' || match == '-')
state = :value if state == :initial && match == '-'
encoder.text_token match, :operator
next
when match = scan(/[,{}\[\]]/)
encoder.text_token match, :operator
next
when state == :initial && match = scan(/[\w.() ]*\S(?= *:(?: |$))/)
encoder.text_token match, :key
key_indent = column(pos - match.size) - 1
state = :colon
next
when match = scan(/(?:"[^"\n]*"|'[^'\n]*')(?= *:(?: |$))/)
encoder.begin_group :key
encoder.text_token match[0,1], :delimiter
encoder.text_token match[1..-2], :content
encoder.text_token match[-1,1], :delimiter
encoder.end_group :key
key_indent = column(pos - match.size) - 1
state = :colon
next
when match = scan(/(![\w\/]+)(:([\w:]+))?/)
encoder.text_token self[1], :type
if self[2]
encoder.text_token ':', :operator
encoder.text_token self[3], :class
end
next
when match = scan(/&\S+/)
encoder.text_token match, :variable
next
when match = scan(/\*\w+/)
encoder.text_token match, :global_variable
next
when match = scan(/<</)
encoder.text_token match, :class_variable
next
when match = scan(/\d\d:\d\d:\d\d/)
encoder.text_token match, :octal
next
when match = scan(/\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d(\.\d+)? [-+]\d\d:\d\d/)
encoder.text_token match, :octal
next
when match = scan(/:\w+/)
encoder.text_token match, :symbol
next
when match = scan(/[^:\s]+(:(?! |$)[^:\s]*)* .*/)
encoder.text_token match, :error
next
when match = scan(/[^:\s]+(:(?! |$)[^:\s]*)*/)
encoder.text_token match, :error
next
end
else
raise if eos?
encoder.text_token getch, :error
end
end
encoder
end
end
end
end