Update CodeRay version to 1.0 final (#4264).
git-svn-id: svn+ssh://rubyforge.org/var/svn/redmine/trunk@7619 e93f8b46-1217-0410-a6f0-8f06a7374b81
This commit is contained in:
parent
d1efb4f148
commit
8c2ae427fa
|
@ -0,0 +1,215 @@
|
|||
#!/usr/bin/env ruby
|
||||
require 'coderay'
|
||||
|
||||
$options, args = ARGV.partition { |arg| arg[/^-[hv]$|--\w+/] }
|
||||
subcommand = args.first if /^\w/ === args.first
|
||||
subcommand = nil if subcommand && File.exist?(subcommand)
|
||||
args.delete subcommand
|
||||
|
||||
def option? *options
|
||||
!($options & options).empty?
|
||||
end
|
||||
|
||||
def tty?
|
||||
$stdout.tty? || option?('--tty')
|
||||
end
|
||||
|
||||
def version
|
||||
puts <<-USAGE
|
||||
CodeRay #{CodeRay::VERSION}
|
||||
USAGE
|
||||
end
|
||||
|
||||
def help
|
||||
puts <<-HELP
|
||||
This is CodeRay #{CodeRay::VERSION}, a syntax highlighting tool for selected languages.
|
||||
|
||||
usage:
|
||||
coderay [-language] [input] [-format] [output]
|
||||
|
||||
defaults:
|
||||
language detect from input file name or shebang; fall back to plain text
|
||||
input STDIN
|
||||
format detect from output file name or use terminal; fall back to HTML
|
||||
output STDOUT
|
||||
|
||||
common:
|
||||
coderay file.rb # highlight file to terminal
|
||||
coderay file.rb > file.html # highlight file to HTML page
|
||||
coderay file.rb -div > file.html # highlight file to HTML snippet
|
||||
|
||||
configure output:
|
||||
coderay file.py output.json # output tokens as JSON
|
||||
coderay file.py -loc # count lines of code in Python file
|
||||
|
||||
configure input:
|
||||
coderay -python file # specify the input language
|
||||
coderay -ruby # take input from STDIN
|
||||
|
||||
more:
|
||||
coderay stylesheet [style] # print CSS stylesheet
|
||||
HELP
|
||||
end
|
||||
|
||||
def commands
|
||||
puts <<-COMMANDS
|
||||
general:
|
||||
highlight code highlighting (default command, optional)
|
||||
stylesheet print the CSS stylesheet with the given name (aliases: style, css)
|
||||
|
||||
about:
|
||||
list [of] list all available plugins (or just the scanners|encoders|styles|filetypes)
|
||||
commands print this list
|
||||
help show some help
|
||||
version print CodeRay version
|
||||
COMMANDS
|
||||
end
|
||||
|
||||
def print_list_of plugin_host
|
||||
plugins = plugin_host.all_plugins.map do |plugin|
|
||||
info = " #{plugin.plugin_id}: #{plugin.title}"
|
||||
|
||||
aliases = (plugin.aliases - [:default]).map { |key| "-#{key}" }.sort_by { |key| key.size }
|
||||
if plugin.respond_to?(:file_extension) || !aliases.empty?
|
||||
additional_info = []
|
||||
additional_info << aliases.join(', ') unless aliases.empty?
|
||||
info << " (#{additional_info.join('; ')})"
|
||||
end
|
||||
|
||||
info << ' <-- default' if plugin.aliases.include? :default
|
||||
|
||||
info
|
||||
end
|
||||
puts plugins.sort
|
||||
end
|
||||
|
||||
if option? '-v', '--version'
|
||||
version
|
||||
end
|
||||
|
||||
if option? '-h', '--help'
|
||||
help
|
||||
end
|
||||
|
||||
case subcommand
|
||||
when 'highlight', nil
|
||||
if ARGV.empty?
|
||||
version
|
||||
help
|
||||
else
|
||||
signature = args.map { |arg| arg[/^-/] ? '-' : 'f' }.join
|
||||
names = args.map { |arg| arg.sub(/^-/, '') }
|
||||
case signature
|
||||
when /^$/
|
||||
exit
|
||||
when /^ff?$/
|
||||
input_file, output_file, = *names
|
||||
when /^f-f?$/
|
||||
input_file, output_format, output_file, = *names
|
||||
when /^-ff?$/
|
||||
input_lang, input_file, output_file, = *names
|
||||
when /^-f-f?$/
|
||||
input_lang, input_file, output_format, output_file, = *names
|
||||
when /^--?f?$/
|
||||
input_lang, output_format, output_file, = *names
|
||||
else
|
||||
$stdout = $stderr
|
||||
help
|
||||
puts
|
||||
puts "Unknown parameter order: #{args.join ' '}, expected: [-language] [input] [-format] [output]"
|
||||
exit 1
|
||||
end
|
||||
|
||||
if input_file
|
||||
input_lang ||= CodeRay::FileType.fetch input_file, :text, true
|
||||
end
|
||||
|
||||
if output_file
|
||||
output_format ||= CodeRay::FileType[output_file]
|
||||
else
|
||||
output_format ||= :terminal
|
||||
end
|
||||
|
||||
output_format = :page if output_format.to_s == 'html'
|
||||
|
||||
if input_file
|
||||
input = File.read input_file
|
||||
else
|
||||
input = $stdin.read
|
||||
end
|
||||
|
||||
begin
|
||||
file =
|
||||
if output_file
|
||||
File.open output_file, 'w'
|
||||
else
|
||||
$stdout.sync = true
|
||||
$stdout
|
||||
end
|
||||
CodeRay.encode(input, input_lang, output_format, :out => file)
|
||||
file.puts
|
||||
rescue CodeRay::PluginHost::PluginNotFound => boom
|
||||
$stdout = $stderr
|
||||
if boom.message[/CodeRay::(\w+)s could not load plugin :?(.*?): /]
|
||||
puts "I don't know the #$1 \"#$2\"."
|
||||
else
|
||||
puts boom.message
|
||||
end
|
||||
# puts "I don't know this plugin: #{boom.message[/Could not load plugin (.*?): /, 1]}."
|
||||
rescue CodeRay::Scanners::Scanner::ScanError # FIXME: rescue Errno::EPIPE
|
||||
# this is sometimes raised by pagers; ignore [TODO: wtf?]
|
||||
ensure
|
||||
file.close if output_file
|
||||
end
|
||||
end
|
||||
when 'li', 'list'
|
||||
arg = args.first && args.first.downcase
|
||||
if [nil, 's', 'sc', 'scanner', 'scanners'].include? arg
|
||||
puts 'input languages (Scanners):'
|
||||
print_list_of CodeRay::Scanners
|
||||
end
|
||||
|
||||
if [nil, 'e', 'en', 'enc', 'encoder', 'encoders'].include? arg
|
||||
puts 'output formats (Encoders):'
|
||||
print_list_of CodeRay::Encoders
|
||||
end
|
||||
|
||||
if [nil, 'st', 'style', 'styles'].include? arg
|
||||
puts 'CSS themes for HTML output (Styles):'
|
||||
print_list_of CodeRay::Styles
|
||||
end
|
||||
|
||||
if [nil, 'f', 'ft', 'file', 'filetype', 'filetypes'].include? arg
|
||||
puts 'recognized file types:'
|
||||
|
||||
filetypes = Hash.new { |h, k| h[k] = [] }
|
||||
CodeRay::FileType::TypeFromExt.inject filetypes do |types, (ext, type)|
|
||||
types[type.to_s] << ".#{ext}"
|
||||
types
|
||||
end
|
||||
CodeRay::FileType::TypeFromName.inject filetypes do |types, (name, type)|
|
||||
types[type.to_s] << name
|
||||
types
|
||||
end
|
||||
|
||||
filetypes.sort.each do |type, exts|
|
||||
puts " #{type}: #{exts.sort_by { |ext| ext.size }.join(', ')}"
|
||||
end
|
||||
end
|
||||
when 'stylesheet', 'style', 'css'
|
||||
puts CodeRay::Encoders[:html]::CSS.new(args.first).stylesheet
|
||||
when 'commands'
|
||||
commands
|
||||
when 'help'
|
||||
help
|
||||
else
|
||||
$stdout = $stderr
|
||||
help
|
||||
puts
|
||||
if subcommand[/\A\w+\z/]
|
||||
puts "Unknown command: #{subcommand}"
|
||||
else
|
||||
puts "File not found: #{subcommand}"
|
||||
end
|
||||
exit 1
|
||||
end
|
|
@ -0,0 +1,278 @@
|
|||
# encoding: utf-8
|
||||
# Encoding.default_internal = 'UTF-8'
|
||||
|
||||
# = CodeRay Library
|
||||
#
|
||||
# CodeRay is a Ruby library for syntax highlighting.
|
||||
#
|
||||
# I try to make CodeRay easy to use and intuitive, but at the same time fully
|
||||
# featured, complete, fast and efficient.
|
||||
#
|
||||
# See README.
|
||||
#
|
||||
# It consists mainly of
|
||||
# * the main engine: CodeRay (Scanners::Scanner, Tokens, Encoders::Encoder)
|
||||
# * the plugin system: PluginHost, Plugin
|
||||
# * the scanners in CodeRay::Scanners
|
||||
# * the encoders in CodeRay::Encoders
|
||||
# * the styles in CodeRay::Styles
|
||||
#
|
||||
# Here's a fancy graphic to light up this gray docu:
|
||||
#
|
||||
# http://cycnus.de/raindark/coderay/scheme.png
|
||||
#
|
||||
# == Documentation
|
||||
#
|
||||
# See CodeRay, Encoders, Scanners, Tokens.
|
||||
#
|
||||
# == Usage
|
||||
#
|
||||
# Remember you need RubyGems to use CodeRay, unless you have it in your load
|
||||
# path. Run Ruby with -rubygems option if required.
|
||||
#
|
||||
# === Highlight Ruby code in a string as html
|
||||
#
|
||||
# require 'coderay'
|
||||
# print CodeRay.scan('puts "Hello, world!"', :ruby).html
|
||||
#
|
||||
# # prints something like this:
|
||||
# puts <span class="s">"Hello, world!"</span>
|
||||
#
|
||||
#
|
||||
# === Highlight C code from a file in a html div
|
||||
#
|
||||
# require 'coderay'
|
||||
# print CodeRay.scan(File.read('ruby.h'), :c).div
|
||||
# print CodeRay.scan_file('ruby.h').html.div
|
||||
#
|
||||
# You can include this div in your page. The used CSS styles can be printed with
|
||||
#
|
||||
# % coderay_stylesheet
|
||||
#
|
||||
# === Highlight without typing too much
|
||||
#
|
||||
# If you are one of the hasty (or lazy, or extremely curious) people, just run this file:
|
||||
#
|
||||
# % ruby -rubygems /path/to/coderay/coderay.rb > example.html
|
||||
#
|
||||
# and look at the file it created in your browser.
|
||||
#
|
||||
# = CodeRay Module
|
||||
#
|
||||
# The CodeRay module provides convenience methods for the engine.
|
||||
#
|
||||
# * The +lang+ and +format+ arguments select Scanner and Encoder to use. These are
|
||||
# simply lower-case symbols, like <tt>:python</tt> or <tt>:html</tt>.
|
||||
# * All methods take an optional hash as last parameter, +options+, that is send to
|
||||
# the Encoder / Scanner.
|
||||
# * Input and language are always sorted in this order: +code+, +lang+.
|
||||
# (This is in alphabetical order, if you need a mnemonic ;)
|
||||
#
|
||||
# You should be able to highlight everything you want just using these methods;
|
||||
# so there is no need to dive into CodeRay's deep class hierarchy.
|
||||
#
|
||||
# The examples in the demo directory demonstrate common cases using this interface.
|
||||
#
|
||||
# = Basic Access Ways
|
||||
#
|
||||
# Read this to get a general view what CodeRay provides.
|
||||
#
|
||||
# == Scanning
|
||||
#
|
||||
# Scanning means analysing an input string, splitting it up into Tokens.
|
||||
# Each Token knows about what type it is: string, comment, class name, etc.
|
||||
#
|
||||
# Each +lang+ (language) has its own Scanner; for example, <tt>:ruby</tt> code is
|
||||
# handled by CodeRay::Scanners::Ruby.
|
||||
#
|
||||
# CodeRay.scan:: Scan a string in a given language into Tokens.
|
||||
# This is the most common method to use.
|
||||
# CodeRay.scan_file:: Scan a file and guess the language using FileType.
|
||||
#
|
||||
# The Tokens object you get from these methods can encode itself; see Tokens.
|
||||
#
|
||||
# == Encoding
|
||||
#
|
||||
# Encoding means compiling Tokens into an output. This can be colored HTML or
|
||||
# LaTeX, a textual statistic or just the number of non-whitespace tokens.
|
||||
#
|
||||
# Each Encoder provides output in a specific +format+, so you select Encoders via
|
||||
# formats like <tt>:html</tt> or <tt>:statistic</tt>.
|
||||
#
|
||||
# CodeRay.encode:: Scan and encode a string in a given language.
|
||||
# CodeRay.encode_tokens:: Encode the given tokens.
|
||||
# CodeRay.encode_file:: Scan a file, guess the language using FileType and encode it.
|
||||
#
|
||||
# == All-in-One Encoding
|
||||
#
|
||||
# CodeRay.encode:: Highlight a string with a given input and output format.
|
||||
#
|
||||
# == Instanciating
|
||||
#
|
||||
# You can use an Encoder instance to highlight multiple inputs. This way, the setup
|
||||
# for this Encoder must only be done once.
|
||||
#
|
||||
# CodeRay.encoder:: Create an Encoder instance with format and options.
|
||||
# CodeRay.scanner:: Create an Scanner instance for lang, with '' as default code.
|
||||
#
|
||||
# To make use of CodeRay.scanner, use CodeRay::Scanner::code=.
|
||||
#
|
||||
# The scanning methods provide more flexibility; we recommend to use these.
|
||||
#
|
||||
# == Reusing Scanners and Encoders
|
||||
#
|
||||
# If you want to re-use scanners and encoders (because that is faster), see
|
||||
# CodeRay::Duo for the most convenient (and recommended) interface.
|
||||
module CodeRay
|
||||
|
||||
$CODERAY_DEBUG ||= false
|
||||
|
||||
require 'coderay/version'
|
||||
|
||||
# helpers
|
||||
autoload :FileType, 'coderay/helpers/file_type'
|
||||
|
||||
# Tokens
|
||||
autoload :Tokens, 'coderay/tokens'
|
||||
autoload :TokensProxy, 'coderay/tokens_proxy'
|
||||
autoload :TokenKinds, 'coderay/token_kinds'
|
||||
|
||||
# Plugin system
|
||||
autoload :PluginHost, 'coderay/helpers/plugin'
|
||||
autoload :Plugin, 'coderay/helpers/plugin'
|
||||
|
||||
# Plugins
|
||||
autoload :Scanners, 'coderay/scanner'
|
||||
autoload :Encoders, 'coderay/encoder'
|
||||
autoload :Styles, 'coderay/style'
|
||||
|
||||
# Convenience access and reusable Encoder/Scanner pair
|
||||
autoload :Duo, 'coderay/duo'
|
||||
|
||||
class << self
|
||||
|
||||
# Scans the given +code+ (a String) with the Scanner for +lang+.
|
||||
#
|
||||
# This is a simple way to use CodeRay. Example:
|
||||
# require 'coderay'
|
||||
# page = CodeRay.scan("puts 'Hello, world!'", :ruby).html
|
||||
#
|
||||
# See also demo/demo_simple.
|
||||
def scan code, lang, options = {}, &block
|
||||
# FIXME: return a proxy for direct-stream encoding
|
||||
TokensProxy.new code, lang, options, block
|
||||
end
|
||||
|
||||
# Scans +filename+ (a path to a code file) with the Scanner for +lang+.
|
||||
#
|
||||
# If +lang+ is :auto or omitted, the CodeRay::FileType module is used to
|
||||
# determine it. If it cannot find out what type it is, it uses
|
||||
# CodeRay::Scanners::Text.
|
||||
#
|
||||
# Calls CodeRay.scan.
|
||||
#
|
||||
# Example:
|
||||
# require 'coderay'
|
||||
# page = CodeRay.scan_file('some_c_code.c').html
|
||||
def scan_file filename, lang = :auto, options = {}, &block
|
||||
lang = FileType.fetch filename, :text, true if lang == :auto
|
||||
code = File.read filename
|
||||
scan code, lang, options, &block
|
||||
end
|
||||
|
||||
# Encode a string.
|
||||
#
|
||||
# This scans +code+ with the the Scanner for +lang+ and then
|
||||
# encodes it with the Encoder for +format+.
|
||||
# +options+ will be passed to the Encoder.
|
||||
#
|
||||
# See CodeRay::Encoder.encode.
|
||||
def encode code, lang, format, options = {}
|
||||
encoder(format, options).encode code, lang, options
|
||||
end
|
||||
|
||||
# Encode pre-scanned Tokens.
|
||||
# Use this together with CodeRay.scan:
|
||||
#
|
||||
# require 'coderay'
|
||||
#
|
||||
# # Highlight a short Ruby code example in a HTML span
|
||||
# tokens = CodeRay.scan '1 + 2', :ruby
|
||||
# puts CodeRay.encode_tokens(tokens, :span)
|
||||
#
|
||||
def encode_tokens tokens, format, options = {}
|
||||
encoder(format, options).encode_tokens tokens, options
|
||||
end
|
||||
|
||||
# Encodes +filename+ (a path to a code file) with the Scanner for +lang+.
|
||||
#
|
||||
# See CodeRay.scan_file.
|
||||
# Notice that the second argument is the output +format+, not the input language.
|
||||
#
|
||||
# Example:
|
||||
# require 'coderay'
|
||||
# page = CodeRay.encode_file 'some_c_code.c', :html
|
||||
def encode_file filename, format, options = {}
|
||||
tokens = scan_file filename, :auto, get_scanner_options(options)
|
||||
encode_tokens tokens, format, options
|
||||
end
|
||||
|
||||
# Highlight a string into a HTML <div>.
|
||||
#
|
||||
# CSS styles use classes, so you have to include a stylesheet
|
||||
# in your output.
|
||||
#
|
||||
# See encode.
|
||||
def highlight code, lang, options = { :css => :class }, format = :div
|
||||
encode code, lang, format, options
|
||||
end
|
||||
|
||||
# Highlight a file into a HTML <div>.
|
||||
#
|
||||
# CSS styles use classes, so you have to include a stylesheet
|
||||
# in your output.
|
||||
#
|
||||
# See encode.
|
||||
def highlight_file filename, options = { :css => :class }, format = :div
|
||||
encode_file filename, format, options
|
||||
end
|
||||
|
||||
# Finds the Encoder class for +format+ and creates an instance, passing
|
||||
# +options+ to it.
|
||||
#
|
||||
# Example:
|
||||
# require 'coderay'
|
||||
#
|
||||
# stats = CodeRay.encoder(:statistic)
|
||||
# stats.encode("puts 17 + 4\n", :ruby)
|
||||
#
|
||||
# puts '%d out of %d tokens have the kind :integer.' % [
|
||||
# stats.type_stats[:integer].count,
|
||||
# stats.real_token_count
|
||||
# ]
|
||||
# #-> 2 out of 4 tokens have the kind :integer.
|
||||
def encoder format, options = {}
|
||||
Encoders[format].new options
|
||||
end
|
||||
|
||||
# Finds the Scanner class for +lang+ and creates an instance, passing
|
||||
# +options+ to it.
|
||||
#
|
||||
# See Scanner.new.
|
||||
def scanner lang, options = {}, &block
|
||||
Scanners[lang].new '', options, &block
|
||||
end
|
||||
|
||||
# Extract the options for the scanner from the +options+ hash.
|
||||
#
|
||||
# Returns an empty Hash if <tt>:scanner_options</tt> is not set.
|
||||
#
|
||||
# This is used if a method like CodeRay.encode has to provide options
|
||||
# for Encoder _and_ scanner.
|
||||
def get_scanner_options options
|
||||
options.fetch :scanner_options, {}
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
|
@ -0,0 +1,65 @@
|
|||
module CodeRay
|
||||
module Encoders
|
||||
|
||||
class HTML
|
||||
class CSS # :nodoc:
|
||||
|
||||
attr :stylesheet
|
||||
|
||||
def CSS.load_stylesheet style = nil
|
||||
CodeRay::Styles[style]
|
||||
end
|
||||
|
||||
def initialize style = :default
|
||||
@classes = Hash.new
|
||||
style = CSS.load_stylesheet style
|
||||
@stylesheet = [
|
||||
style::CSS_MAIN_STYLES,
|
||||
style::TOKEN_COLORS.gsub(/^(?!$)/, '.CodeRay ')
|
||||
].join("\n")
|
||||
parse style::TOKEN_COLORS
|
||||
end
|
||||
|
||||
def get_style styles
|
||||
cl = @classes[styles.first]
|
||||
return '' unless cl
|
||||
style = ''
|
||||
1.upto styles.size do |offset|
|
||||
break if style = cl[styles[offset .. -1]]
|
||||
end
|
||||
# warn 'Style not found: %p' % [styles] if style.empty?
|
||||
return style
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
CSS_CLASS_PATTERN = /
|
||||
( # $1 = selectors
|
||||
(?:
|
||||
(?: \s* \. [-\w]+ )+
|
||||
\s* ,?
|
||||
)+
|
||||
)
|
||||
\s* \{ \s*
|
||||
( [^\}]+ )? # $2 = style
|
||||
\s* \} \s*
|
||||
|
|
||||
( [^\n]+ ) # $3 = error
|
||||
/mx
|
||||
def parse stylesheet
|
||||
stylesheet.scan CSS_CLASS_PATTERN do |selectors, style, error|
|
||||
raise "CSS parse error: '#{error.inspect}' not recognized" if error
|
||||
for selector in selectors.split(',')
|
||||
classes = selector.scan(/[-\w]+/)
|
||||
cl = classes.pop
|
||||
@classes[cl] ||= Hash.new
|
||||
@classes[cl][classes] = style.to_s.strip.delete(' ').chomp(';')
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,115 @@
|
|||
module CodeRay
|
||||
module Encoders
|
||||
|
||||
class HTML
|
||||
|
||||
module Numbering # :nodoc:
|
||||
|
||||
def self.number! output, mode = :table, options = {}
|
||||
return self unless mode
|
||||
|
||||
options = DEFAULT_OPTIONS.merge options
|
||||
|
||||
start = options[:line_number_start]
|
||||
unless start.is_a? Integer
|
||||
raise ArgumentError, "Invalid value %p for :line_number_start; Integer expected." % start
|
||||
end
|
||||
|
||||
anchor_prefix = options[:line_number_anchors]
|
||||
anchor_prefix = 'line' if anchor_prefix == true
|
||||
anchor_prefix = anchor_prefix.to_s[/\w+/] if anchor_prefix
|
||||
anchoring =
|
||||
if anchor_prefix
|
||||
proc do |line|
|
||||
line = line.to_s
|
||||
anchor = anchor_prefix + line
|
||||
"<a href=\"##{anchor}\" name=\"#{anchor}\">#{line}</a>"
|
||||
end
|
||||
else
|
||||
proc { |line| line.to_s } # :to_s.to_proc in Ruby 1.8.7+
|
||||
end
|
||||
|
||||
bold_every = options[:bold_every]
|
||||
highlight_lines = options[:highlight_lines]
|
||||
bolding =
|
||||
if bold_every == false && highlight_lines == nil
|
||||
anchoring
|
||||
elsif highlight_lines.is_a? Enumerable
|
||||
highlight_lines = highlight_lines.to_set
|
||||
proc do |line|
|
||||
if highlight_lines.include? line
|
||||
"<strong class=\"highlighted\">#{anchoring[line]}</strong>" # highlighted line numbers in bold
|
||||
else
|
||||
anchoring[line]
|
||||
end
|
||||
end
|
||||
elsif bold_every.is_a? Integer
|
||||
raise ArgumentError, ":bolding can't be 0." if bold_every == 0
|
||||
proc do |line|
|
||||
if line % bold_every == 0
|
||||
"<strong>#{anchoring[line]}</strong>" # every bold_every-th number in bold
|
||||
else
|
||||
anchoring[line]
|
||||
end
|
||||
end
|
||||
else
|
||||
raise ArgumentError, 'Invalid value %p for :bolding; false or Integer expected.' % bold_every
|
||||
end
|
||||
|
||||
line_count = output.count("\n")
|
||||
position_of_last_newline = output.rindex(RUBY_VERSION >= '1.9' ? /\n/ : ?\n)
|
||||
if position_of_last_newline
|
||||
after_last_newline = output[position_of_last_newline + 1 .. -1]
|
||||
ends_with_newline = after_last_newline[/\A(?:<\/span>)*\z/]
|
||||
line_count += 1 if not ends_with_newline
|
||||
end
|
||||
|
||||
case mode
|
||||
when :inline
|
||||
max_width = (start + line_count).to_s.size
|
||||
line_number = start
|
||||
nesting = []
|
||||
output.gsub!(/^.*$\n?/) do |line|
|
||||
line.chomp!
|
||||
open = nesting.join
|
||||
line.scan(%r!<(/)?span[^>]*>?!) do |close,|
|
||||
if close
|
||||
nesting.pop
|
||||
else
|
||||
nesting << $&
|
||||
end
|
||||
end
|
||||
close = '</span>' * nesting.size
|
||||
|
||||
line_number_text = bolding.call line_number
|
||||
indent = ' ' * (max_width - line_number.to_s.size) # TODO: Optimize (10^x)
|
||||
line_number += 1
|
||||
"<span class=\"line-numbers\">#{indent}#{line_number_text}</span>#{open}#{line}#{close}\n"
|
||||
end
|
||||
|
||||
when :table
|
||||
line_numbers = (start ... start + line_count).map(&bolding).join("\n")
|
||||
line_numbers << "\n"
|
||||
line_numbers_table_template = Output::TABLE.apply('LINE_NUMBERS', line_numbers)
|
||||
|
||||
output.gsub!(/<\/div>\n/, '</div>')
|
||||
output.wrap_in! line_numbers_table_template
|
||||
output.wrapped_in = :div
|
||||
|
||||
when :list
|
||||
raise NotImplementedError, 'The :list option is no longer available. Use :table.'
|
||||
|
||||
else
|
||||
raise ArgumentError, 'Unknown value %p for mode: expected one of %p' %
|
||||
[mode, [:table, :inline]]
|
||||
end
|
||||
|
||||
output
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,158 @@
|
|||
module CodeRay
|
||||
module Encoders
|
||||
|
||||
class HTML
|
||||
|
||||
# This module is included in the output String of the HTML Encoder.
|
||||
#
|
||||
# It provides methods like wrap, div, page etc.
|
||||
#
|
||||
# Remember to use #clone instead of #dup to keep the modules the object was
|
||||
# extended with.
|
||||
#
|
||||
# TODO: Rewrite this without monkey patching.
|
||||
module Output
|
||||
|
||||
attr_accessor :css
|
||||
|
||||
class << self
|
||||
|
||||
# Raises an exception if an object that doesn't respond to to_str is extended by Output,
|
||||
# to prevent users from misuse. Use Module#remove_method to disable.
|
||||
def extended o # :nodoc:
|
||||
warn "The Output module is intended to extend instances of String, not #{o.class}." unless o.respond_to? :to_str
|
||||
end
|
||||
|
||||
def make_stylesheet css, in_tag = false # :nodoc:
|
||||
sheet = css.stylesheet
|
||||
sheet = <<-'CSS' if in_tag
|
||||
<style type="text/css">
|
||||
#{sheet}
|
||||
</style>
|
||||
CSS
|
||||
sheet
|
||||
end
|
||||
|
||||
def page_template_for_css css # :nodoc:
|
||||
sheet = make_stylesheet css
|
||||
PAGE.apply 'CSS', sheet
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
def wrapped_in? element
|
||||
wrapped_in == element
|
||||
end
|
||||
|
||||
def wrapped_in
|
||||
@wrapped_in ||= nil
|
||||
end
|
||||
attr_writer :wrapped_in
|
||||
|
||||
def wrap_in! template
|
||||
Template.wrap! self, template, 'CONTENT'
|
||||
self
|
||||
end
|
||||
|
||||
def apply_title! title
|
||||
self.sub!(/(<title>)(<\/title>)/) { $1 + title + $2 }
|
||||
self
|
||||
end
|
||||
|
||||
def wrap! element, *args
|
||||
return self if not element or element == wrapped_in
|
||||
case element
|
||||
when :div
|
||||
raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? nil
|
||||
wrap_in! DIV
|
||||
when :span
|
||||
raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? nil
|
||||
wrap_in! SPAN
|
||||
when :page
|
||||
wrap! :div if wrapped_in? nil
|
||||
raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? :div
|
||||
wrap_in! Output.page_template_for_css(@css)
|
||||
if args.first.is_a?(Hash) && title = args.first[:title]
|
||||
apply_title! title
|
||||
end
|
||||
self
|
||||
when nil
|
||||
return self
|
||||
else
|
||||
raise "Unknown value %p for :wrap" % element
|
||||
end
|
||||
@wrapped_in = element
|
||||
self
|
||||
end
|
||||
|
||||
def stylesheet in_tag = false
|
||||
Output.make_stylesheet @css, in_tag
|
||||
end
|
||||
|
||||
#-- don't include the templates in docu
|
||||
|
||||
class Template < String # :nodoc:
|
||||
|
||||
def self.wrap! str, template, target
|
||||
target = Regexp.new(Regexp.escape("<%#{target}%>"))
|
||||
if template =~ target
|
||||
str[0,0] = $`
|
||||
str << $'
|
||||
else
|
||||
raise "Template target <%%%p%%> not found" % target
|
||||
end
|
||||
end
|
||||
|
||||
def apply target, replacement
|
||||
target = Regexp.new(Regexp.escape("<%#{target}%>"))
|
||||
if self =~ target
|
||||
Template.new($` + replacement + $')
|
||||
else
|
||||
raise "Template target <%%%p%%> not found" % target
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
SPAN = Template.new '<span class="CodeRay"><%CONTENT%></span>'
|
||||
|
||||
DIV = Template.new <<-DIV
|
||||
<div class="CodeRay">
|
||||
<div class="code"><pre><%CONTENT%></pre></div>
|
||||
</div>
|
||||
DIV
|
||||
|
||||
TABLE = Template.new <<-TABLE
|
||||
<table class="CodeRay"><tr>
|
||||
<td class="line-numbers" title="double click to toggle" ondblclick="with (this.firstChild.style) { display = (display == '') ? 'none' : '' }"><pre><%LINE_NUMBERS%></pre></td>
|
||||
<td class="code"><pre><%CONTENT%></pre></td>
|
||||
</tr></table>
|
||||
TABLE
|
||||
|
||||
PAGE = Template.new <<-PAGE
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<title></title>
|
||||
<style type="text/css">
|
||||
.CodeRay .line-numbers a {
|
||||
text-decoration: inherit;
|
||||
color: inherit;
|
||||
}
|
||||
<%CSS%>
|
||||
</style>
|
||||
</head>
|
||||
<body style="background-color: white;">
|
||||
|
||||
<%CONTENT%>
|
||||
</body>
|
||||
</html>
|
||||
PAGE
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,24 @@
|
|||
module CodeRay
|
||||
module Scanners
|
||||
|
||||
map \
|
||||
:'c++' => :cpp,
|
||||
:cplusplus => :cpp,
|
||||
:ecmascript => :java_script,
|
||||
:ecma_script => :java_script,
|
||||
:rhtml => :erb,
|
||||
:eruby => :erb,
|
||||
:irb => :ruby,
|
||||
:javascript => :java_script,
|
||||
:js => :java_script,
|
||||
:pascal => :delphi,
|
||||
:patch => :diff,
|
||||
:plain => :text,
|
||||
:plaintext => :text,
|
||||
:xhtml => :html,
|
||||
:yml => :yaml
|
||||
|
||||
default :text
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,189 @@
|
|||
module CodeRay
|
||||
module Scanners
|
||||
|
||||
# Scanner for C.
|
||||
class C < Scanner
|
||||
|
||||
register_for :c
|
||||
file_extension 'c'
|
||||
|
||||
KEYWORDS = [
|
||||
'asm', 'break', 'case', 'continue', 'default', 'do',
|
||||
'else', 'enum', 'for', 'goto', 'if', 'return',
|
||||
'sizeof', 'struct', 'switch', 'typedef', 'union', 'while',
|
||||
'restrict', # added in C99
|
||||
] # :nodoc:
|
||||
|
||||
PREDEFINED_TYPES = [
|
||||
'int', 'long', 'short', 'char',
|
||||
'signed', 'unsigned', 'float', 'double',
|
||||
'bool', 'complex', # added in C99
|
||||
] # :nodoc:
|
||||
|
||||
PREDEFINED_CONSTANTS = [
|
||||
'EOF', 'NULL',
|
||||
'true', 'false', # added in C99
|
||||
] # :nodoc:
|
||||
DIRECTIVES = [
|
||||
'auto', 'extern', 'register', 'static', 'void',
|
||||
'const', 'volatile', # added in C89
|
||||
'inline', # added in C99
|
||||
] # :nodoc:
|
||||
|
||||
IDENT_KIND = WordList.new(:ident).
|
||||
add(KEYWORDS, :keyword).
|
||||
add(PREDEFINED_TYPES, :predefined_type).
|
||||
add(DIRECTIVES, :directive).
|
||||
add(PREDEFINED_CONSTANTS, :predefined_constant) # :nodoc:
|
||||
|
||||
ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
|
||||
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
|
||||
|
||||
protected
|
||||
|
||||
def scan_tokens encoder, options
|
||||
|
||||
state = :initial
|
||||
label_expected = true
|
||||
case_expected = false
|
||||
label_expected_before_preproc_line = nil
|
||||
in_preproc_line = false
|
||||
|
||||
until eos?
|
||||
|
||||
case state
|
||||
|
||||
when :initial
|
||||
|
||||
if match = scan(/ \s+ | \\\n /x)
|
||||
if in_preproc_line && match != "\\\n" && match.index(?\n)
|
||||
in_preproc_line = false
|
||||
label_expected = label_expected_before_preproc_line
|
||||
end
|
||||
encoder.text_token match, :space
|
||||
|
||||
elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
||||
encoder.text_token match, :comment
|
||||
|
||||
elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
|
||||
label_expected = match =~ /[;\{\}]/
|
||||
if case_expected
|
||||
label_expected = true if match == ':'
|
||||
case_expected = false
|
||||
end
|
||||
encoder.text_token match, :operator
|
||||
|
||||
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
|
||||
kind = IDENT_KIND[match]
|
||||
if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/)
|
||||
kind = :label
|
||||
match << matched
|
||||
else
|
||||
label_expected = false
|
||||
if kind == :keyword
|
||||
case match
|
||||
when 'case', 'default'
|
||||
case_expected = true
|
||||
end
|
||||
end
|
||||
end
|
||||
encoder.text_token match, kind
|
||||
|
||||
elsif match = scan(/L?"/)
|
||||
encoder.begin_group :string
|
||||
if match[0] == ?L
|
||||
encoder.text_token 'L', :modifier
|
||||
match = '"'
|
||||
end
|
||||
encoder.text_token match, :delimiter
|
||||
state = :string
|
||||
|
||||
elsif match = scan(/ \# \s* if \s* 0 /x)
|
||||
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
|
||||
encoder.text_token match, :comment
|
||||
|
||||
elsif match = scan(/#[ \t]*(\w*)/)
|
||||
encoder.text_token match, :preprocessor
|
||||
in_preproc_line = true
|
||||
label_expected_before_preproc_line = label_expected
|
||||
state = :include_expected if self[1] == 'include'
|
||||
|
||||
elsif match = scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
|
||||
label_expected = false
|
||||
encoder.text_token match, :char
|
||||
|
||||
elsif match = scan(/\$/)
|
||||
encoder.text_token match, :ident
|
||||
|
||||
elsif match = scan(/0[xX][0-9A-Fa-f]+/)
|
||||
label_expected = false
|
||||
encoder.text_token match, :hex
|
||||
|
||||
elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/)
|
||||
label_expected = false
|
||||
encoder.text_token match, :octal
|
||||
|
||||
elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/)
|
||||
label_expected = false
|
||||
encoder.text_token match, :integer
|
||||
|
||||
elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
|
||||
label_expected = false
|
||||
encoder.text_token match, :float
|
||||
|
||||
else
|
||||
encoder.text_token getch, :error
|
||||
|
||||
end
|
||||
|
||||
when :string
|
||||
if match = scan(/[^\\\n"]+/)
|
||||
encoder.text_token match, :content
|
||||
elsif match = scan(/"/)
|
||||
encoder.text_token match, :delimiter
|
||||
encoder.end_group :string
|
||||
state = :initial
|
||||
label_expected = false
|
||||
elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
|
||||
encoder.text_token match, :char
|
||||
elsif match = scan(/ \\ | $ /x)
|
||||
encoder.end_group :string
|
||||
encoder.text_token match, :error
|
||||
state = :initial
|
||||
label_expected = false
|
||||
else
|
||||
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
|
||||
end
|
||||
|
||||
when :include_expected
|
||||
if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
|
||||
encoder.text_token match, :include
|
||||
state = :initial
|
||||
|
||||
elsif match = scan(/\s+/)
|
||||
encoder.text_token match, :space
|
||||
state = :initial if match.index ?\n
|
||||
|
||||
else
|
||||
state = :initial
|
||||
|
||||
end
|
||||
|
||||
else
|
||||
raise_inspect 'Unknown state', encoder
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
if state == :string
|
||||
encoder.end_group :string
|
||||
end
|
||||
|
||||
encoder
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,217 @@
|
|||
# encoding: utf-8
|
||||
module CodeRay
|
||||
module Scanners
|
||||
|
||||
# Clojure scanner by Licenser.
|
||||
class Clojure < Scanner
|
||||
|
||||
register_for :clojure
|
||||
file_extension 'clj'
|
||||
|
||||
SPECIAL_FORMS = %w[
|
||||
def if do let quote var fn loop recur throw try catch monitor-enter monitor-exit .
|
||||
new
|
||||
] # :nodoc:
|
||||
|
||||
CORE_FORMS = %w[
|
||||
+ - -> ->> .. / * <= < = == >= > accessor aclone add-classpath add-watch
|
||||
agent agent-error agent-errors aget alength alias all-ns alter alter-meta!
|
||||
alter-var-root amap ancestors and apply areduce array-map aset aset-boolean
|
||||
aset-byte aset-char aset-double aset-float aset-int aset-long aset-short
|
||||
assert assoc assoc! assoc-in associative? atom await await-for bases bean
|
||||
bigdec bigint binding bit-and bit-and-not bit-clear bit-flip bit-not bit-or
|
||||
bit-set bit-shift-left bit-shift-right bit-test bit-xor boolean boolean-array
|
||||
booleans bound-fn bound-fn* bound? butlast byte byte-array bytes case cast char
|
||||
char-array char-escape-string char-name-string char? chars class class?
|
||||
clear-agent-errors clojure-version coll? comment commute comp comparator
|
||||
compare compare-and-set! compile complement concat cond condp conj conj!
|
||||
cons constantly construct-proxy contains? count counted? create-ns
|
||||
create-struct cycle dec decimal? declare definline defmacro defmethod defmulti
|
||||
defn defn- defonce defprotocol defrecord defstruct deftype delay delay?
|
||||
deliver denominator deref derive descendants disj disj! dissoc dissoc!
|
||||
distinct distinct? doall doc dorun doseq dosync dotimes doto double
|
||||
double-array doubles drop drop-last drop-while empty empty? ensure
|
||||
enumeration-seq error-handler error-mode eval even? every? extend
|
||||
extend-protocol extend-type extenders extends? false? ffirst file-seq
|
||||
filter find find-doc find-ns find-var first float float-array float?
|
||||
floats flush fn fn? fnext for force format future future-call future-cancel
|
||||
future-cancelled? future-done? future? gen-class gen-interface gensym get
|
||||
get-in get-method get-proxy-class get-thread-bindings get-validator hash
|
||||
hash-map hash-set identical? identity if-let if-not ifn? import in-ns
|
||||
inc init-proxy instance? int int-array integer? interleave intern
|
||||
interpose into into-array ints io! isa? iterate iterator-seq juxt key
|
||||
keys keyword keyword? last lazy-cat lazy-seq let letfn line-seq list list*
|
||||
list? load load-file load-reader load-string loaded-libs locking long
|
||||
long-array longs loop macroexpand macroexpand-1 make-array make-hierarchy
|
||||
map map? mapcat max max-key memfn memoize merge merge-with meta methods
|
||||
min min-key mod name namespace neg? newline next nfirst nil? nnext not
|
||||
not-any? not-empty not-every? not= ns ns-aliases ns-imports ns-interns
|
||||
ns-map ns-name ns-publics ns-refers ns-resolve ns-unalias ns-unmap nth
|
||||
nthnext num number? numerator object-array odd? or parents partial
|
||||
partition pcalls peek persistent! pmap pop pop! pop-thread-bindings
|
||||
pos? pr pr-str prefer-method prefers print print-namespace-doc
|
||||
print-str printf println println-str prn prn-str promise proxy
|
||||
proxy-mappings proxy-super push-thread-bindings pvalues quot rand
|
||||
rand-int range ratio? rationalize re-find re-groups re-matcher
|
||||
re-matches re-pattern re-seq read read-line read-string reduce ref
|
||||
ref-history-count ref-max-history ref-min-history ref-set refer
|
||||
refer-clojure reify release-pending-sends rem remove remove-all-methods
|
||||
remove-method remove-ns remove-watch repeat repeatedly replace replicate
|
||||
require reset! reset-meta! resolve rest restart-agent resultset-seq
|
||||
reverse reversible? rseq rsubseq satisfies? second select-keys send
|
||||
send-off seq seq? seque sequence sequential? set set-error-handler!
|
||||
set-error-mode! set-validator! set? short short-array shorts
|
||||
shutdown-agents slurp some sort sort-by sorted-map sorted-map-by
|
||||
sorted-set sorted-set-by sorted? special-form-anchor special-symbol?
|
||||
split-at split-with str string? struct struct-map subs subseq subvec
|
||||
supers swap! symbol symbol? sync syntax-symbol-anchor take take-last
|
||||
take-nth take-while test the-ns thread-bound? time to-array to-array-2d
|
||||
trampoline transient tree-seq true? type unchecked-add unchecked-dec
|
||||
unchecked-divide unchecked-inc unchecked-multiply unchecked-negate
|
||||
unchecked-remainder unchecked-subtract underive update-in update-proxy
|
||||
use val vals var-get var-set var? vary-meta vec vector vector-of vector?
|
||||
when when-first when-let when-not while with-bindings with-bindings*
|
||||
with-in-str with-local-vars with-meta with-open with-out-str
|
||||
with-precision xml-seq zero? zipmap
|
||||
] # :nodoc:
|
||||
|
||||
PREDEFINED_CONSTANTS = %w[
|
||||
true false nil *1 *2 *3 *agent* *clojure-version* *command-line-args*
|
||||
*compile-files* *compile-path* *e *err* *file* *flush-on-newline*
|
||||
*in* *ns* *out* *print-dup* *print-length* *print-level* *print-meta*
|
||||
*print-readably* *read-eval* *warn-on-reflection*
|
||||
] # :nodoc:
|
||||
|
||||
IDENT_KIND = WordList.new(:ident).
|
||||
add(SPECIAL_FORMS, :keyword).
|
||||
add(CORE_FORMS, :keyword).
|
||||
add(PREDEFINED_CONSTANTS, :predefined_constant)
|
||||
|
||||
KEYWORD_NEXT_TOKEN_KIND = WordList.new(nil).
|
||||
add(%w[ def defn defn- definline defmacro defmulti defmethod defstruct defonce declare ], :function).
|
||||
add(%w[ ns ], :namespace).
|
||||
add(%w[ defprotocol defrecord ], :class)
|
||||
|
||||
BASIC_IDENTIFIER = /[a-zA-Z$%*\/_+!?&<>\-=]=?[a-zA-Z0-9$&*+!\/_?<>\-\#]*/
|
||||
IDENTIFIER = /(?!-\d)(?:(?:#{BASIC_IDENTIFIER}\.)*#{BASIC_IDENTIFIER}(?:\/#{BASIC_IDENTIFIER})?\.?)|\.\.?/
|
||||
SYMBOL = /::?#{IDENTIFIER}/o
|
||||
DIGIT = /\d/
|
||||
DIGIT10 = DIGIT
|
||||
DIGIT16 = /[0-9a-f]/i
|
||||
DIGIT8 = /[0-7]/
|
||||
DIGIT2 = /[01]/
|
||||
RADIX16 = /\#x/i
|
||||
RADIX8 = /\#o/i
|
||||
RADIX2 = /\#b/i
|
||||
RADIX10 = /\#d/i
|
||||
EXACTNESS = /#i|#e/i
|
||||
SIGN = /[\+-]?/
|
||||
EXP_MARK = /[esfdl]/i
|
||||
EXP = /#{EXP_MARK}#{SIGN}#{DIGIT}+/
|
||||
SUFFIX = /#{EXP}?/
|
||||
PREFIX10 = /#{RADIX10}?#{EXACTNESS}?|#{EXACTNESS}?#{RADIX10}?/
|
||||
PREFIX16 = /#{RADIX16}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX16}/
|
||||
PREFIX8 = /#{RADIX8}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX8}/
|
||||
PREFIX2 = /#{RADIX2}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX2}/
|
||||
UINT10 = /#{DIGIT10}+#*/
|
||||
UINT16 = /#{DIGIT16}+#*/
|
||||
UINT8 = /#{DIGIT8}+#*/
|
||||
UINT2 = /#{DIGIT2}+#*/
|
||||
DECIMAL = /#{DIGIT10}+#+\.#*#{SUFFIX}|#{DIGIT10}+\.#{DIGIT10}*#*#{SUFFIX}|\.#{DIGIT10}+#*#{SUFFIX}|#{UINT10}#{EXP}/
|
||||
UREAL10 = /#{UINT10}\/#{UINT10}|#{DECIMAL}|#{UINT10}/
|
||||
UREAL16 = /#{UINT16}\/#{UINT16}|#{UINT16}/
|
||||
UREAL8 = /#{UINT8}\/#{UINT8}|#{UINT8}/
|
||||
UREAL2 = /#{UINT2}\/#{UINT2}|#{UINT2}/
|
||||
REAL10 = /#{SIGN}#{UREAL10}/
|
||||
REAL16 = /#{SIGN}#{UREAL16}/
|
||||
REAL8 = /#{SIGN}#{UREAL8}/
|
||||
REAL2 = /#{SIGN}#{UREAL2}/
|
||||
IMAG10 = /i|#{UREAL10}i/
|
||||
IMAG16 = /i|#{UREAL16}i/
|
||||
IMAG8 = /i|#{UREAL8}i/
|
||||
IMAG2 = /i|#{UREAL2}i/
|
||||
COMPLEX10 = /#{REAL10}@#{REAL10}|#{REAL10}\+#{IMAG10}|#{REAL10}-#{IMAG10}|\+#{IMAG10}|-#{IMAG10}|#{REAL10}/
|
||||
COMPLEX16 = /#{REAL16}@#{REAL16}|#{REAL16}\+#{IMAG16}|#{REAL16}-#{IMAG16}|\+#{IMAG16}|-#{IMAG16}|#{REAL16}/
|
||||
COMPLEX8 = /#{REAL8}@#{REAL8}|#{REAL8}\+#{IMAG8}|#{REAL8}-#{IMAG8}|\+#{IMAG8}|-#{IMAG8}|#{REAL8}/
|
||||
COMPLEX2 = /#{REAL2}@#{REAL2}|#{REAL2}\+#{IMAG2}|#{REAL2}-#{IMAG2}|\+#{IMAG2}|-#{IMAG2}|#{REAL2}/
|
||||
NUM10 = /#{PREFIX10}?#{COMPLEX10}/
|
||||
NUM16 = /#{PREFIX16}#{COMPLEX16}/
|
||||
NUM8 = /#{PREFIX8}#{COMPLEX8}/
|
||||
NUM2 = /#{PREFIX2}#{COMPLEX2}/
|
||||
NUM = /#{NUM10}|#{NUM16}|#{NUM8}|#{NUM2}/
|
||||
|
||||
protected
|
||||
|
||||
def scan_tokens encoder, options
|
||||
|
||||
state = :initial
|
||||
kind = nil
|
||||
|
||||
until eos?
|
||||
|
||||
case state
|
||||
when :initial
|
||||
if match = scan(/ \s+ | \\\n | , /x)
|
||||
encoder.text_token match, :space
|
||||
elsif match = scan(/['`\(\[\)\]\{\}]|\#[({]|~@?|[@\^]/)
|
||||
encoder.text_token match, :operator
|
||||
elsif match = scan(/;.*/)
|
||||
encoder.text_token match, :comment # TODO: recognize (comment ...) too
|
||||
elsif match = scan(/\#?\\(?:newline|space|.?)/)
|
||||
encoder.text_token match, :char
|
||||
elsif match = scan(/\#[ft]/)
|
||||
encoder.text_token match, :predefined_constant
|
||||
elsif match = scan(/#{IDENTIFIER}/o)
|
||||
kind = IDENT_KIND[match]
|
||||
encoder.text_token match, kind
|
||||
if rest? && kind == :keyword
|
||||
if kind = KEYWORD_NEXT_TOKEN_KIND[match]
|
||||
encoder.text_token match, :space if match = scan(/\s+/o)
|
||||
encoder.text_token match, kind if match = scan(/#{IDENTIFIER}/o)
|
||||
end
|
||||
end
|
||||
elsif match = scan(/#{SYMBOL}/o)
|
||||
encoder.text_token match, :symbol
|
||||
elsif match = scan(/\./)
|
||||
encoder.text_token match, :operator
|
||||
elsif match = scan(/ \# \^ #{IDENTIFIER} /ox)
|
||||
encoder.text_token match, :type
|
||||
elsif match = scan(/ (\#)? " /x)
|
||||
state = self[1] ? :regexp : :string
|
||||
encoder.begin_group state
|
||||
encoder.text_token match, :delimiter
|
||||
elsif match = scan(/#{NUM}/o) and not matched.empty?
|
||||
encoder.text_token match, match[/[.e\/]/i] ? :float : :integer
|
||||
else
|
||||
encoder.text_token getch, :error
|
||||
end
|
||||
|
||||
when :string, :regexp
|
||||
if match = scan(/[^"\\]+|\\.?/)
|
||||
encoder.text_token match, :content
|
||||
elsif match = scan(/"/)
|
||||
encoder.text_token match, :delimiter
|
||||
encoder.end_group state
|
||||
state = :initial
|
||||
else
|
||||
raise_inspect "else case \" reached; %p not handled." % peek(1),
|
||||
encoder, state
|
||||
end
|
||||
|
||||
else
|
||||
raise 'else case reached'
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
if [:string, :regexp].include? state
|
||||
encoder.end_group state
|
||||
end
|
||||
|
||||
encoder
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,215 @@
|
|||
module CodeRay
|
||||
module Scanners
|
||||
|
||||
# Scanner for C++.
|
||||
#
|
||||
# Aliases: +cplusplus+, c++
|
||||
class CPlusPlus < Scanner
|
||||
|
||||
register_for :cpp
|
||||
file_extension 'cpp'
|
||||
title 'C++'
|
||||
|
||||
#-- http://www.cppreference.com/wiki/keywords/start
|
||||
KEYWORDS = [
|
||||
'and', 'and_eq', 'asm', 'bitand', 'bitor', 'break',
|
||||
'case', 'catch', 'class', 'compl', 'const_cast',
|
||||
'continue', 'default', 'delete', 'do', 'dynamic_cast', 'else',
|
||||
'enum', 'export', 'for', 'goto', 'if', 'namespace', 'new',
|
||||
'not', 'not_eq', 'or', 'or_eq', 'reinterpret_cast', 'return',
|
||||
'sizeof', 'static_cast', 'struct', 'switch', 'template',
|
||||
'throw', 'try', 'typedef', 'typeid', 'typename', 'union',
|
||||
'while', 'xor', 'xor_eq',
|
||||
] # :nodoc:
|
||||
|
||||
PREDEFINED_TYPES = [
|
||||
'bool', 'char', 'double', 'float', 'int', 'long',
|
||||
'short', 'signed', 'unsigned', 'wchar_t', 'string',
|
||||
] # :nodoc:
|
||||
PREDEFINED_CONSTANTS = [
|
||||
'false', 'true',
|
||||
'EOF', 'NULL',
|
||||
] # :nodoc:
|
||||
PREDEFINED_VARIABLES = [
|
||||
'this',
|
||||
] # :nodoc:
|
||||
DIRECTIVES = [
|
||||
'auto', 'const', 'explicit', 'extern', 'friend', 'inline', 'mutable', 'operator',
|
||||
'private', 'protected', 'public', 'register', 'static', 'using', 'virtual', 'void',
|
||||
'volatile',
|
||||
] # :nodoc:
|
||||
|
||||
IDENT_KIND = WordList.new(:ident).
|
||||
add(KEYWORDS, :keyword).
|
||||
add(PREDEFINED_TYPES, :predefined_type).
|
||||
add(PREDEFINED_VARIABLES, :local_variable).
|
||||
add(DIRECTIVES, :directive).
|
||||
add(PREDEFINED_CONSTANTS, :predefined_constant) # :nodoc:
|
||||
|
||||
ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
|
||||
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
|
||||
|
||||
protected
|
||||
|
||||
def scan_tokens encoder, options
|
||||
|
||||
state = :initial
|
||||
label_expected = true
|
||||
case_expected = false
|
||||
label_expected_before_preproc_line = nil
|
||||
in_preproc_line = false
|
||||
|
||||
until eos?
|
||||
|
||||
case state
|
||||
|
||||
when :initial
|
||||
|
||||
if match = scan(/ \s+ | \\\n /x)
|
||||
if in_preproc_line && match != "\\\n" && match.index(?\n)
|
||||
in_preproc_line = false
|
||||
label_expected = label_expected_before_preproc_line
|
||||
end
|
||||
encoder.text_token match, :space
|
||||
|
||||
elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
||||
encoder.text_token match, :comment
|
||||
|
||||
elsif match = scan(/ \# \s* if \s* 0 /x)
|
||||
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
|
||||
encoder.text_token match, :comment
|
||||
|
||||
elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
|
||||
label_expected = match =~ /[;\{\}]/
|
||||
if case_expected
|
||||
label_expected = true if match == ':'
|
||||
case_expected = false
|
||||
end
|
||||
encoder.text_token match, :operator
|
||||
|
||||
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
|
||||
kind = IDENT_KIND[match]
|
||||
if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/)
|
||||
kind = :label
|
||||
match << matched
|
||||
else
|
||||
label_expected = false
|
||||
if kind == :keyword
|
||||
case match
|
||||
when 'class'
|
||||
state = :class_name_expected
|
||||
when 'case', 'default'
|
||||
case_expected = true
|
||||
end
|
||||
end
|
||||
end
|
||||
encoder.text_token match, kind
|
||||
|
||||
elsif match = scan(/\$/)
|
||||
encoder.text_token match, :ident
|
||||
|
||||
elsif match = scan(/L?"/)
|
||||
encoder.begin_group :string
|
||||
if match[0] == ?L
|
||||
encoder.text_token match, 'L', :modifier
|
||||
match = '"'
|
||||
end
|
||||
state = :string
|
||||
encoder.text_token match, :delimiter
|
||||
|
||||
elsif match = scan(/#[ \t]*(\w*)/)
|
||||
encoder.text_token match, :preprocessor
|
||||
in_preproc_line = true
|
||||
label_expected_before_preproc_line = label_expected
|
||||
state = :include_expected if self[1] == 'include'
|
||||
|
||||
elsif match = scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
|
||||
label_expected = false
|
||||
encoder.text_token match, :char
|
||||
|
||||
elsif match = scan(/0[xX][0-9A-Fa-f]+/)
|
||||
label_expected = false
|
||||
encoder.text_token match, :hex
|
||||
|
||||
elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/)
|
||||
label_expected = false
|
||||
encoder.text_token match, :octal
|
||||
|
||||
elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/)
|
||||
label_expected = false
|
||||
encoder.text_token match, :integer
|
||||
|
||||
elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
|
||||
label_expected = false
|
||||
encoder.text_token match, :float
|
||||
|
||||
else
|
||||
encoder.text_token getch, :error
|
||||
|
||||
end
|
||||
|
||||
when :string
|
||||
if match = scan(/[^\\"]+/)
|
||||
encoder.text_token match, :content
|
||||
elsif match = scan(/"/)
|
||||
encoder.text_token match, :delimiter
|
||||
encoder.end_group :string
|
||||
state = :initial
|
||||
label_expected = false
|
||||
elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
|
||||
encoder.text_token match, :char
|
||||
elsif match = scan(/ \\ | $ /x)
|
||||
encoder.end_group :string
|
||||
encoder.text_token match, :error
|
||||
state = :initial
|
||||
label_expected = false
|
||||
else
|
||||
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
|
||||
end
|
||||
|
||||
when :include_expected
|
||||
if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
|
||||
encoder.text_token match, :include
|
||||
state = :initial
|
||||
|
||||
elsif match = scan(/\s+/)
|
||||
encoder.text_token match, :space
|
||||
state = :initial if match.index ?\n
|
||||
|
||||
else
|
||||
state = :initial
|
||||
|
||||
end
|
||||
|
||||
when :class_name_expected
|
||||
if match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
|
||||
encoder.text_token match, :class
|
||||
state = :initial
|
||||
|
||||
elsif match = scan(/\s+/)
|
||||
encoder.text_token match, :space
|
||||
|
||||
else
|
||||
encoder.text_token getch, :error
|
||||
state = :initial
|
||||
|
||||
end
|
||||
|
||||
else
|
||||
raise_inspect 'Unknown state', encoder
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
if state == :string
|
||||
encoder.end_group :string
|
||||
end
|
||||
|
||||
encoder
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,192 @@
|
|||
module CodeRay
|
||||
module Scanners
|
||||
|
||||
class CSS < Scanner
|
||||
|
||||
register_for :css
|
||||
|
||||
KINDS_NOT_LOC = [
|
||||
:comment,
|
||||
:class, :pseudo_class, :type,
|
||||
:constant, :directive,
|
||||
:key, :value, :operator, :color, :float, :string,
|
||||
:error, :important,
|
||||
] # :nodoc:
|
||||
|
||||
module RE # :nodoc:
|
||||
Hex = /[0-9a-fA-F]/
|
||||
Unicode = /\\#{Hex}{1,6}(?:\r\n|\s)?/ # differs from standard because it allows uppercase hex too
|
||||
Escape = /#{Unicode}|\\[^\r\n\f0-9a-fA-F]/
|
||||
NMChar = /[-_a-zA-Z0-9]|#{Escape}/
|
||||
NMStart = /[_a-zA-Z]|#{Escape}/
|
||||
NL = /\r\n|\r|\n|\f/
|
||||
String1 = /"(?:[^\n\r\f\\"]|\\#{NL}|#{Escape})*"?/ # TODO: buggy regexp
|
||||
String2 = /'(?:[^\n\r\f\\']|\\#{NL}|#{Escape})*'?/ # TODO: buggy regexp
|
||||
String = /#{String1}|#{String2}/
|
||||
|
||||
HexColor = /#(?:#{Hex}{6}|#{Hex}{3})/
|
||||
Color = /#{HexColor}/
|
||||
|
||||
Num = /-?(?:[0-9]+|[0-9]*\.[0-9]+)/
|
||||
Name = /#{NMChar}+/
|
||||
Ident = /-?#{NMStart}#{NMChar}*/
|
||||
AtKeyword = /@#{Ident}/
|
||||
Percentage = /#{Num}%/
|
||||
|
||||
reldimensions = %w[em ex px]
|
||||
absdimensions = %w[in cm mm pt pc]
|
||||
Unit = Regexp.union(*(reldimensions + absdimensions))
|
||||
|
||||
Dimension = /#{Num}#{Unit}/
|
||||
|
||||
Comment = %r! /\* (?: .*? \*/ | .* ) !mx
|
||||
Function = /(?:url|alpha|attr|counters?)\((?:[^)\n\r\f]|\\\))*\)?/
|
||||
|
||||
Id = /##{Name}/
|
||||
Class = /\.#{Name}/
|
||||
PseudoClass = /:#{Name}/
|
||||
AttributeSelector = /\[[^\]]*\]?/
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
def scan_tokens encoder, options
|
||||
|
||||
value_expected = nil
|
||||
states = [:initial]
|
||||
|
||||
until eos?
|
||||
|
||||
if match = scan(/\s+/)
|
||||
encoder.text_token match, :space
|
||||
|
||||
elsif case states.last
|
||||
when :initial, :media
|
||||
if match = scan(/(?>#{RE::Ident})(?!\()|\*/ox)
|
||||
encoder.text_token match, :type
|
||||
next
|
||||
elsif match = scan(RE::Class)
|
||||
encoder.text_token match, :class
|
||||
next
|
||||
elsif match = scan(RE::Id)
|
||||
encoder.text_token match, :constant
|
||||
next
|
||||
elsif match = scan(RE::PseudoClass)
|
||||
encoder.text_token match, :pseudo_class
|
||||
next
|
||||
elsif match = scan(RE::AttributeSelector)
|
||||
# TODO: Improve highlighting inside of attribute selectors.
|
||||
encoder.text_token match[0,1], :operator
|
||||
encoder.text_token match[1..-2], :attribute_name if match.size > 2
|
||||
encoder.text_token match[-1,1], :operator if match[-1] == ?]
|
||||
next
|
||||
elsif match = scan(/@media/)
|
||||
encoder.text_token match, :directive
|
||||
states.push :media_before_name
|
||||
next
|
||||
end
|
||||
|
||||
when :block
|
||||
if match = scan(/(?>#{RE::Ident})(?!\()/ox)
|
||||
if value_expected
|
||||
encoder.text_token match, :value
|
||||
else
|
||||
encoder.text_token match, :key
|
||||
end
|
||||
next
|
||||
end
|
||||
|
||||
when :media_before_name
|
||||
if match = scan(RE::Ident)
|
||||
encoder.text_token match, :type
|
||||
states[-1] = :media_after_name
|
||||
next
|
||||
end
|
||||
|
||||
when :media_after_name
|
||||
if match = scan(/\{/)
|
||||
encoder.text_token match, :operator
|
||||
states[-1] = :media
|
||||
next
|
||||
end
|
||||
|
||||
else
|
||||
#:nocov:
|
||||
raise_inspect 'Unknown state', encoder
|
||||
#:nocov:
|
||||
|
||||
end
|
||||
|
||||
elsif match = scan(/\/\*(?:.*?\*\/|\z)/m)
|
||||
encoder.text_token match, :comment
|
||||
|
||||
elsif match = scan(/\{/)
|
||||
value_expected = false
|
||||
encoder.text_token match, :operator
|
||||
states.push :block
|
||||
|
||||
elsif match = scan(/\}/)
|
||||
value_expected = false
|
||||
if states.last == :block || states.last == :media
|
||||
encoder.text_token match, :operator
|
||||
states.pop
|
||||
else
|
||||
encoder.text_token match, :error
|
||||
end
|
||||
|
||||
elsif match = scan(/#{RE::String}/o)
|
||||
encoder.begin_group :string
|
||||
encoder.text_token match[0, 1], :delimiter
|
||||
encoder.text_token match[1..-2], :content if match.size > 2
|
||||
encoder.text_token match[-1, 1], :delimiter if match.size >= 2
|
||||
encoder.end_group :string
|
||||
|
||||
elsif match = scan(/#{RE::Function}/o)
|
||||
encoder.begin_group :string
|
||||
start = match[/^\w+\(/]
|
||||
encoder.text_token start, :delimiter
|
||||
if match[-1] == ?)
|
||||
encoder.text_token match[start.size..-2], :content
|
||||
encoder.text_token ')', :delimiter
|
||||
else
|
||||
encoder.text_token match[start.size..-1], :content
|
||||
end
|
||||
encoder.end_group :string
|
||||
|
||||
elsif match = scan(/(?: #{RE::Dimension} | #{RE::Percentage} | #{RE::Num} )/ox)
|
||||
encoder.text_token match, :float
|
||||
|
||||
elsif match = scan(/#{RE::Color}/o)
|
||||
encoder.text_token match, :color
|
||||
|
||||
elsif match = scan(/! *important/)
|
||||
encoder.text_token match, :important
|
||||
|
||||
elsif match = scan(/(?:rgb|hsl)a?\([^()\n]*\)?/)
|
||||
encoder.text_token match, :color
|
||||
|
||||
elsif match = scan(RE::AtKeyword)
|
||||
encoder.text_token match, :directive
|
||||
|
||||
elsif match = scan(/ [+>:;,.=()\/] /x)
|
||||
if match == ':'
|
||||
value_expected = true
|
||||
elsif match == ';'
|
||||
value_expected = false
|
||||
end
|
||||
encoder.text_token match, :operator
|
||||
|
||||
else
|
||||
encoder.text_token getch, :error
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
encoder
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,65 @@
|
|||
module CodeRay
|
||||
module Scanners
|
||||
|
||||
# = Debug Scanner
|
||||
#
|
||||
# Interprets the output of the Encoders::Debug encoder.
|
||||
class Debug < Scanner
|
||||
|
||||
register_for :debug
|
||||
title 'CodeRay Token Dump Import'
|
||||
|
||||
protected
|
||||
|
||||
def scan_tokens encoder, options
|
||||
|
||||
opened_tokens = []
|
||||
|
||||
until eos?
|
||||
|
||||
if match = scan(/\s+/)
|
||||
encoder.text_token match, :space
|
||||
|
||||
elsif match = scan(/ (\w+) \( ( [^\)\\]* ( \\. [^\)\\]* )* ) \)? /x)
|
||||
kind = self[1].to_sym
|
||||
match = self[2].gsub(/\\(.)/m, '\1')
|
||||
unless TokenKinds.has_key? kind
|
||||
kind = :error
|
||||
match = matched
|
||||
end
|
||||
encoder.text_token match, kind
|
||||
|
||||
elsif match = scan(/ (\w+) ([<\[]) /x)
|
||||
kind = self[1].to_sym
|
||||
opened_tokens << kind
|
||||
case self[2]
|
||||
when '<'
|
||||
encoder.begin_group kind
|
||||
when '['
|
||||
encoder.begin_line kind
|
||||
else
|
||||
raise 'CodeRay bug: This case should not be reached.'
|
||||
end
|
||||
|
||||
elsif !opened_tokens.empty? && match = scan(/ > /x)
|
||||
encoder.end_group opened_tokens.pop
|
||||
|
||||
elsif !opened_tokens.empty? && match = scan(/ \] /x)
|
||||
encoder.end_line opened_tokens.pop
|
||||
|
||||
else
|
||||
encoder.text_token getch, :space
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
encoder.end_group opened_tokens.pop until opened_tokens.empty?
|
||||
|
||||
encoder
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,144 @@
|
|||
module CodeRay
|
||||
module Scanners
|
||||
|
||||
# Scanner for the Delphi language (Object Pascal).
|
||||
#
|
||||
# Alias: +pascal+
|
||||
class Delphi < Scanner
|
||||
|
||||
register_for :delphi
|
||||
file_extension 'pas'
|
||||
|
||||
KEYWORDS = [
|
||||
'and', 'array', 'as', 'at', 'asm', 'at', 'begin', 'case', 'class',
|
||||
'const', 'constructor', 'destructor', 'dispinterface', 'div', 'do',
|
||||
'downto', 'else', 'end', 'except', 'exports', 'file', 'finalization',
|
||||
'finally', 'for', 'function', 'goto', 'if', 'implementation', 'in',
|
||||
'inherited', 'initialization', 'inline', 'interface', 'is', 'label',
|
||||
'library', 'mod', 'nil', 'not', 'object', 'of', 'or', 'out', 'packed',
|
||||
'procedure', 'program', 'property', 'raise', 'record', 'repeat',
|
||||
'resourcestring', 'set', 'shl', 'shr', 'string', 'then', 'threadvar',
|
||||
'to', 'try', 'type', 'unit', 'until', 'uses', 'var', 'while', 'with',
|
||||
'xor', 'on',
|
||||
] # :nodoc:
|
||||
|
||||
DIRECTIVES = [
|
||||
'absolute', 'abstract', 'assembler', 'at', 'automated', 'cdecl',
|
||||
'contains', 'deprecated', 'dispid', 'dynamic', 'export',
|
||||
'external', 'far', 'forward', 'implements', 'local',
|
||||
'near', 'nodefault', 'on', 'overload', 'override',
|
||||
'package', 'pascal', 'platform', 'private', 'protected', 'public',
|
||||
'published', 'read', 'readonly', 'register', 'reintroduce',
|
||||
'requires', 'resident', 'safecall', 'stdcall', 'stored', 'varargs',
|
||||
'virtual', 'write', 'writeonly',
|
||||
] # :nodoc:
|
||||
|
||||
IDENT_KIND = WordList::CaseIgnoring.new(:ident).
|
||||
add(KEYWORDS, :keyword).
|
||||
add(DIRECTIVES, :directive) # :nodoc:
|
||||
|
||||
NAME_FOLLOWS = WordList::CaseIgnoring.new(false).
|
||||
add(%w(procedure function .)) # :nodoc:
|
||||
|
||||
protected
|
||||
|
||||
def scan_tokens encoder, options
|
||||
|
||||
state = :initial
|
||||
last_token = ''
|
||||
|
||||
until eos?
|
||||
|
||||
if state == :initial
|
||||
|
||||
if match = scan(/ \s+ /x)
|
||||
encoder.text_token match, :space
|
||||
next
|
||||
|
||||
elsif match = scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx)
|
||||
encoder.text_token match, :preprocessor
|
||||
next
|
||||
|
||||
elsif match = scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx)
|
||||
encoder.text_token match, :comment
|
||||
next
|
||||
|
||||
elsif match = scan(/ <[>=]? | >=? | :=? | [-+=*\/;,@\^|\(\)\[\]] | \.\. /x)
|
||||
encoder.text_token match, :operator
|
||||
|
||||
elsif match = scan(/\./)
|
||||
encoder.text_token match, :operator
|
||||
next if last_token == 'end'
|
||||
|
||||
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
|
||||
encoder.text_token match, NAME_FOLLOWS[last_token] ? :ident : IDENT_KIND[match]
|
||||
|
||||
elsif match = skip(/ ' ( [^\n']|'' ) (?:'|$) /x)
|
||||
encoder.begin_group :char
|
||||
encoder.text_token "'", :delimiter
|
||||
encoder.text_token self[1], :content
|
||||
encoder.text_token "'", :delimiter
|
||||
encoder.end_group :char
|
||||
next
|
||||
|
||||
elsif match = scan(/ ' /x)
|
||||
encoder.begin_group :string
|
||||
encoder.text_token match, :delimiter
|
||||
state = :string
|
||||
|
||||
elsif match = scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x)
|
||||
encoder.text_token match, :char
|
||||
|
||||
elsif match = scan(/ \$ [0-9A-Fa-f]+ /x)
|
||||
encoder.text_token match, :hex
|
||||
|
||||
elsif match = scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x)
|
||||
encoder.text_token match, :integer
|
||||
|
||||
elsif match = scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x)
|
||||
encoder.text_token match, :float
|
||||
|
||||
else
|
||||
encoder.text_token getch, :error
|
||||
next
|
||||
|
||||
end
|
||||
|
||||
elsif state == :string
|
||||
if match = scan(/[^\n']+/)
|
||||
encoder.text_token match, :content
|
||||
elsif match = scan(/''/)
|
||||
encoder.text_token match, :char
|
||||
elsif match = scan(/'/)
|
||||
encoder.text_token match, :delimiter
|
||||
encoder.end_group :string
|
||||
state = :initial
|
||||
next
|
||||
elsif match = scan(/\n/)
|
||||
encoder.end_group :string
|
||||
encoder.text_token match, :space
|
||||
state = :initial
|
||||
else
|
||||
raise "else case \' reached; %p not handled." % peek(1), encoder
|
||||
end
|
||||
|
||||
else
|
||||
raise 'else-case reached', encoder
|
||||
|
||||
end
|
||||
|
||||
last_token = match
|
||||
|
||||
end
|
||||
|
||||
if state == :string
|
||||
encoder.end_group state
|
||||
end
|
||||
|
||||
encoder
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,201 @@
|
|||
module CodeRay
|
||||
module Scanners
|
||||
|
||||
# Scanner for output of the diff command.
|
||||
#
|
||||
# Alias: +patch+
|
||||
class Diff < Scanner
|
||||
|
||||
register_for :diff
|
||||
title 'diff output'
|
||||
|
||||
DEFAULT_OPTIONS = {
|
||||
:highlight_code => true,
|
||||
:inline_diff => true,
|
||||
}
|
||||
|
||||
protected
|
||||
|
||||
require 'coderay/helpers/file_type'
|
||||
|
||||
def scan_tokens encoder, options
|
||||
|
||||
line_kind = nil
|
||||
state = :initial
|
||||
deleted_lines = 0
|
||||
scanners = Hash.new do |h, lang|
|
||||
h[lang] = Scanners[lang].new '', :keep_tokens => true, :keep_state => true
|
||||
end
|
||||
content_scanner = scanners[:plain]
|
||||
content_scanner_entry_state = nil
|
||||
|
||||
until eos?
|
||||
|
||||
if match = scan(/\n/)
|
||||
deleted_lines = 0 unless line_kind == :delete
|
||||
if line_kind
|
||||
encoder.end_line line_kind
|
||||
line_kind = nil
|
||||
end
|
||||
encoder.text_token match, :space
|
||||
next
|
||||
end
|
||||
|
||||
case state
|
||||
|
||||
when :initial
|
||||
if match = scan(/--- |\+\+\+ |=+|_+/)
|
||||
encoder.begin_line line_kind = :head
|
||||
encoder.text_token match, :head
|
||||
if match = scan(/.*?(?=$|[\t\n\x00]| \(revision)/)
|
||||
encoder.text_token match, :filename
|
||||
if options[:highlight_code]
|
||||
file_type = FileType.fetch(match, :text)
|
||||
file_type = :text if file_type == :diff
|
||||
content_scanner = scanners[file_type]
|
||||
content_scanner_entry_state = nil
|
||||
end
|
||||
end
|
||||
next unless match = scan(/.+/)
|
||||
encoder.text_token match, :plain
|
||||
elsif match = scan(/Index: |Property changes on: /)
|
||||
encoder.begin_line line_kind = :head
|
||||
encoder.text_token match, :head
|
||||
next unless match = scan(/.+/)
|
||||
encoder.text_token match, :plain
|
||||
elsif match = scan(/Added: /)
|
||||
encoder.begin_line line_kind = :head
|
||||
encoder.text_token match, :head
|
||||
next unless match = scan(/.+/)
|
||||
encoder.text_token match, :plain
|
||||
state = :added
|
||||
elsif match = scan(/\\ .*/)
|
||||
encoder.text_token match, :comment
|
||||
elsif match = scan(/@@(?>[^@\n]*)@@/)
|
||||
content_scanner.state = :initial unless match?(/\n\+/)
|
||||
content_scanner_entry_state = nil
|
||||
if check(/\n|$/)
|
||||
encoder.begin_line line_kind = :change
|
||||
else
|
||||
encoder.begin_group :change
|
||||
end
|
||||
encoder.text_token match[0,2], :change
|
||||
encoder.text_token match[2...-2], :plain
|
||||
encoder.text_token match[-2,2], :change
|
||||
encoder.end_group :change unless line_kind
|
||||
next unless match = scan(/.+/)
|
||||
if options[:highlight_code]
|
||||
content_scanner.tokenize match, :tokens => encoder
|
||||
else
|
||||
encoder.text_token match, :plain
|
||||
end
|
||||
next
|
||||
elsif match = scan(/\+/)
|
||||
encoder.begin_line line_kind = :insert
|
||||
encoder.text_token match, :insert
|
||||
next unless match = scan(/.+/)
|
||||
if options[:highlight_code]
|
||||
content_scanner.tokenize match, :tokens => encoder
|
||||
else
|
||||
encoder.text_token match, :plain
|
||||
end
|
||||
next
|
||||
elsif match = scan(/-/)
|
||||
deleted_lines += 1
|
||||
encoder.begin_line line_kind = :delete
|
||||
encoder.text_token match, :delete
|
||||
if options[:inline_diff] && deleted_lines == 1 && check(/(?>.*)\n\+(?>.*)$(?!\n\+)/)
|
||||
content_scanner_entry_state = content_scanner.state
|
||||
skip(/(.*)\n\+(.*)$/)
|
||||
head, deletion, insertion, tail = diff self[1], self[2]
|
||||
pre, deleted, post = content_scanner.tokenize [head, deletion, tail], :tokens => Tokens.new
|
||||
encoder.tokens pre
|
||||
unless deleted.empty?
|
||||
encoder.begin_group :eyecatcher
|
||||
encoder.tokens deleted
|
||||
encoder.end_group :eyecatcher
|
||||
end
|
||||
encoder.tokens post
|
||||
encoder.end_line line_kind
|
||||
encoder.text_token "\n", :space
|
||||
encoder.begin_line line_kind = :insert
|
||||
encoder.text_token '+', :insert
|
||||
content_scanner.state = content_scanner_entry_state || :initial
|
||||
pre, inserted, post = content_scanner.tokenize [head, insertion, tail], :tokens => Tokens.new
|
||||
encoder.tokens pre
|
||||
unless inserted.empty?
|
||||
encoder.begin_group :eyecatcher
|
||||
encoder.tokens inserted
|
||||
encoder.end_group :eyecatcher
|
||||
end
|
||||
encoder.tokens post
|
||||
elsif match = scan(/.*/)
|
||||
if options[:highlight_code]
|
||||
if deleted_lines == 1
|
||||
content_scanner_entry_state = content_scanner.state
|
||||
end
|
||||
content_scanner.tokenize match, :tokens => encoder unless match.empty?
|
||||
if !match?(/\n-/)
|
||||
if match?(/\n\+/)
|
||||
content_scanner.state = content_scanner_entry_state || :initial
|
||||
end
|
||||
content_scanner_entry_state = nil
|
||||
end
|
||||
else
|
||||
encoder.text_token match, :plain
|
||||
end
|
||||
end
|
||||
next
|
||||
elsif match = scan(/ .*/)
|
||||
if options[:highlight_code]
|
||||
content_scanner.tokenize match, :tokens => encoder
|
||||
else
|
||||
encoder.text_token match, :plain
|
||||
end
|
||||
next
|
||||
elsif match = scan(/.+/)
|
||||
encoder.begin_line line_kind = :comment
|
||||
encoder.text_token match, :plain
|
||||
else
|
||||
raise_inspect 'else case rached'
|
||||
end
|
||||
|
||||
when :added
|
||||
if match = scan(/ \+/)
|
||||
encoder.begin_line line_kind = :insert
|
||||
encoder.text_token match, :insert
|
||||
next unless match = scan(/.+/)
|
||||
encoder.text_token match, :plain
|
||||
else
|
||||
state = :initial
|
||||
next
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
encoder.end_line line_kind if line_kind
|
||||
|
||||
encoder
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def diff a, b
|
||||
# i will be the index of the leftmost difference from the left.
|
||||
i_max = [a.size, b.size].min
|
||||
i = 0
|
||||
i += 1 while i < i_max && a[i] == b[i]
|
||||
# j_min will be the index of the leftmost difference from the right.
|
||||
j_min = i - i_max
|
||||
# j will be the index of the rightmost difference from the right which
|
||||
# does not precede the leftmost one from the left.
|
||||
j = -1
|
||||
j -= 1 while j >= j_min && a[j] == b[j]
|
||||
return a[0...i], a[i..j], b[i..j], (j < -1) ? a[j+1..-1] : ''
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,81 @@
|
|||
module CodeRay
|
||||
module Scanners
|
||||
|
||||
load :html
|
||||
load :ruby
|
||||
|
||||
# Scanner for HTML ERB templates.
|
||||
class ERB < Scanner
|
||||
|
||||
register_for :erb
|
||||
title 'HTML ERB Template'
|
||||
|
||||
KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
|
||||
|
||||
ERB_RUBY_BLOCK = /
|
||||
(<%(?!%)[-=\#]?)
|
||||
((?>
|
||||
[^\-%]* # normal*
|
||||
(?> # special
|
||||
(?: %(?!>) | -(?!%>) )
|
||||
[^\-%]* # normal*
|
||||
)*
|
||||
))
|
||||
((?: -?%> )?)
|
||||
/x # :nodoc:
|
||||
|
||||
START_OF_ERB = /
|
||||
<%(?!%)
|
||||
/x # :nodoc:
|
||||
|
||||
protected
|
||||
|
||||
def setup
|
||||
@ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
|
||||
@html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
|
||||
end
|
||||
|
||||
def reset_instance
|
||||
super
|
||||
@html_scanner.reset
|
||||
end
|
||||
|
||||
def scan_tokens encoder, options
|
||||
|
||||
until eos?
|
||||
|
||||
if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_rest) and not match.empty?
|
||||
@html_scanner.tokenize match, :tokens => encoder
|
||||
|
||||
elsif match = scan(/#{ERB_RUBY_BLOCK}/o)
|
||||
start_tag = self[1]
|
||||
code = self[2]
|
||||
end_tag = self[3]
|
||||
|
||||
encoder.begin_group :inline
|
||||
encoder.text_token start_tag, :inline_delimiter
|
||||
|
||||
if start_tag == '<%#'
|
||||
encoder.text_token code, :comment
|
||||
else
|
||||
@ruby_scanner.tokenize code, :tokens => encoder
|
||||
end unless code.empty?
|
||||
|
||||
encoder.text_token end_tag, :inline_delimiter unless end_tag.empty?
|
||||
encoder.end_group :inline
|
||||
|
||||
else
|
||||
raise_inspect 'else-case reached!', encoder
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
encoder
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,255 @@
|
|||
module CodeRay
|
||||
module Scanners
|
||||
|
||||
load :java
|
||||
|
||||
# Scanner for Groovy.
|
||||
class Groovy < Java
|
||||
|
||||
register_for :groovy
|
||||
|
||||
# TODO: check list of keywords
|
||||
GROOVY_KEYWORDS = %w[
|
||||
as assert def in
|
||||
] # :nodoc:
|
||||
KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
|
||||
case instanceof new return throw typeof while as assert in
|
||||
] # :nodoc:
|
||||
GROOVY_MAGIC_VARIABLES = %w[ it ] # :nodoc:
|
||||
|
||||
IDENT_KIND = Java::IDENT_KIND.dup.
|
||||
add(GROOVY_KEYWORDS, :keyword).
|
||||
add(GROOVY_MAGIC_VARIABLES, :local_variable) # :nodoc:
|
||||
|
||||
ESCAPE = / [bfnrtv$\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
|
||||
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc: no 4-byte unicode chars? U[a-fA-F0-9]{8}
|
||||
REGEXP_ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | \d | [bBdDsSwW\/] /x # :nodoc:
|
||||
|
||||
# TODO: interpretation inside ', ", /
|
||||
STRING_CONTENT_PATTERN = {
|
||||
"'" => /(?>\\[^\\'\n]+|[^\\'\n]+)+/,
|
||||
'"' => /[^\\$"\n]+/,
|
||||
"'''" => /(?>[^\\']+|'(?!''))+/,
|
||||
'"""' => /(?>[^\\$"]+|"(?!""))+/,
|
||||
'/' => /[^\\$\/\n]+/,
|
||||
} # :nodoc:
|
||||
|
||||
protected
|
||||
|
||||
def scan_tokens encoder, options
|
||||
|
||||
state = :initial
|
||||
inline_block_stack = []
|
||||
inline_block_paren_depth = nil
|
||||
string_delimiter = nil
|
||||
import_clause = class_name_follows = last_token = after_def = false
|
||||
value_expected = true
|
||||
|
||||
until eos?
|
||||
|
||||
case state
|
||||
|
||||
when :initial
|
||||
|
||||
if match = scan(/ \s+ | \\\n /x)
|
||||
encoder.text_token match, :space
|
||||
if match.index ?\n
|
||||
import_clause = after_def = false
|
||||
value_expected = true unless value_expected
|
||||
end
|
||||
next
|
||||
|
||||
elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
||||
value_expected = true
|
||||
after_def = false
|
||||
encoder.text_token match, :comment
|
||||
|
||||
elsif bol? && match = scan(/ \#!.* /x)
|
||||
encoder.text_token match, :doctype
|
||||
|
||||
elsif import_clause && match = scan(/ (?!as) #{IDENT} (?: \. #{IDENT} )* (?: \.\* )? /ox)
|
||||
after_def = value_expected = false
|
||||
encoder.text_token match, :include
|
||||
|
||||
elsif match = scan(/ #{IDENT} | \[\] /ox)
|
||||
kind = IDENT_KIND[match]
|
||||
value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
|
||||
if last_token == '.'
|
||||
kind = :ident
|
||||
elsif class_name_follows
|
||||
kind = :class
|
||||
class_name_follows = false
|
||||
elsif after_def && check(/\s*[({]/)
|
||||
kind = :method
|
||||
after_def = false
|
||||
elsif kind == :ident && last_token != '?' && check(/:/)
|
||||
kind = :key
|
||||
else
|
||||
class_name_follows = true if match == 'class' || (import_clause && match == 'as')
|
||||
import_clause = match == 'import'
|
||||
after_def = true if match == 'def'
|
||||
end
|
||||
encoder.text_token match, kind
|
||||
|
||||
elsif match = scan(/;/)
|
||||
import_clause = after_def = false
|
||||
value_expected = true
|
||||
encoder.text_token match, :operator
|
||||
|
||||
elsif match = scan(/\{/)
|
||||
class_name_follows = after_def = false
|
||||
value_expected = true
|
||||
encoder.text_token match, :operator
|
||||
if !inline_block_stack.empty?
|
||||
inline_block_paren_depth += 1
|
||||
end
|
||||
|
||||
# TODO: ~'...', ~"..." and ~/.../ style regexps
|
||||
elsif match = scan(/ \.\.<? | \*?\.(?!\d)@? | \.& | \?:? | [,?:(\[] | -[->] | \+\+ |
|
||||
&& | \|\| | \*\*=? | ==?~ | <=?>? | [-+*%^~&|>=!]=? | <<<?=? | >>>?=? /x)
|
||||
value_expected = true
|
||||
value_expected = :regexp if match == '~'
|
||||
after_def = false
|
||||
encoder.text_token match, :operator
|
||||
|
||||
elsif match = scan(/ [)\]}] /x)
|
||||
value_expected = after_def = false
|
||||
if !inline_block_stack.empty? && match == '}'
|
||||
inline_block_paren_depth -= 1
|
||||
if inline_block_paren_depth == 0 # closing brace of inline block reached
|
||||
encoder.text_token match, :inline_delimiter
|
||||
encoder.end_group :inline
|
||||
state, string_delimiter, inline_block_paren_depth = inline_block_stack.pop
|
||||
next
|
||||
end
|
||||
end
|
||||
encoder.text_token match, :operator
|
||||
|
||||
elsif check(/[\d.]/)
|
||||
after_def = value_expected = false
|
||||
if match = scan(/0[xX][0-9A-Fa-f]+/)
|
||||
encoder.text_token match, :hex
|
||||
elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
|
||||
encoder.text_token match, :octal
|
||||
elsif match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
|
||||
encoder.text_token match, :float
|
||||
elsif match = scan(/\d+[lLgG]?/)
|
||||
encoder.text_token match, :integer
|
||||
end
|
||||
|
||||
elsif match = scan(/'''|"""/)
|
||||
after_def = value_expected = false
|
||||
state = :multiline_string
|
||||
encoder.begin_group :string
|
||||
string_delimiter = match
|
||||
encoder.text_token match, :delimiter
|
||||
|
||||
# TODO: record.'name' syntax
|
||||
elsif match = scan(/["']/)
|
||||
after_def = value_expected = false
|
||||
state = match == '/' ? :regexp : :string
|
||||
encoder.begin_group state
|
||||
string_delimiter = match
|
||||
encoder.text_token match, :delimiter
|
||||
|
||||
elsif value_expected && match = scan(/\//)
|
||||
after_def = value_expected = false
|
||||
encoder.begin_group :regexp
|
||||
state = :regexp
|
||||
string_delimiter = '/'
|
||||
encoder.text_token match, :delimiter
|
||||
|
||||
elsif match = scan(/ @ #{IDENT} /ox)
|
||||
after_def = value_expected = false
|
||||
encoder.text_token match, :annotation
|
||||
|
||||
elsif match = scan(/\//)
|
||||
after_def = false
|
||||
value_expected = true
|
||||
encoder.text_token match, :operator
|
||||
|
||||
else
|
||||
encoder.text_token getch, :error
|
||||
|
||||
end
|
||||
|
||||
when :string, :regexp, :multiline_string
|
||||
if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
|
||||
encoder.text_token match, :content
|
||||
|
||||
elsif match = scan(state == :multiline_string ? /'''|"""/ : /["'\/]/)
|
||||
encoder.text_token match, :delimiter
|
||||
if state == :regexp
|
||||
# TODO: regexp modifiers? s, m, x, i?
|
||||
modifiers = scan(/[ix]+/)
|
||||
encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
|
||||
end
|
||||
state = :string if state == :multiline_string
|
||||
encoder.end_group state
|
||||
string_delimiter = nil
|
||||
after_def = value_expected = false
|
||||
state = :initial
|
||||
next
|
||||
|
||||
elsif (state == :string || state == :multiline_string) &&
|
||||
(match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
|
||||
if string_delimiter[0] == ?' && !(match == "\\\\" || match == "\\'")
|
||||
encoder.text_token match, :content
|
||||
else
|
||||
encoder.text_token match, :char
|
||||
end
|
||||
elsif state == :regexp && match = scan(/ \\ (?: #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
|
||||
encoder.text_token match, :char
|
||||
|
||||
elsif match = scan(/ \$ #{IDENT} /mox)
|
||||
encoder.begin_group :inline
|
||||
encoder.text_token '$', :inline_delimiter
|
||||
match = match[1..-1]
|
||||
encoder.text_token match, IDENT_KIND[match]
|
||||
encoder.end_group :inline
|
||||
next
|
||||
elsif match = scan(/ \$ \{ /x)
|
||||
encoder.begin_group :inline
|
||||
encoder.text_token match, :inline_delimiter
|
||||
inline_block_stack << [state, string_delimiter, inline_block_paren_depth]
|
||||
inline_block_paren_depth = 1
|
||||
state = :initial
|
||||
next
|
||||
|
||||
elsif match = scan(/ \$ /mx)
|
||||
encoder.text_token match, :content
|
||||
|
||||
elsif match = scan(/ \\. /mx)
|
||||
encoder.text_token match, :content # TODO: Shouldn't this be :error?
|
||||
|
||||
elsif match = scan(/ \\ | \n /x)
|
||||
encoder.end_group state
|
||||
encoder.text_token match, :error
|
||||
after_def = value_expected = false
|
||||
state = :initial
|
||||
|
||||
else
|
||||
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
|
||||
|
||||
end
|
||||
|
||||
else
|
||||
raise_inspect 'Unknown state', encoder
|
||||
|
||||
end
|
||||
|
||||
last_token = match unless [:space, :comment, :doctype].include? kind
|
||||
|
||||
end
|
||||
|
||||
if [:multiline_string, :string, :regexp].include? state
|
||||
encoder.end_group state
|
||||
end
|
||||
|
||||
encoder
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,168 @@
|
|||
module CodeRay
|
||||
module Scanners
|
||||
|
||||
load :ruby
|
||||
load :html
|
||||
load :java_script
|
||||
|
||||
class HAML < Scanner
|
||||
|
||||
register_for :haml
|
||||
title 'HAML Template'
|
||||
|
||||
KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
|
||||
|
||||
protected
|
||||
|
||||
def setup
|
||||
super
|
||||
@ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
|
||||
@embedded_ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true, :state => @ruby_scanner.interpreted_string_state
|
||||
@html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true
|
||||
end
|
||||
|
||||
def scan_tokens encoder, options
|
||||
|
||||
match = nil
|
||||
code = ''
|
||||
|
||||
until eos?
|
||||
|
||||
if bol?
|
||||
if match = scan(/!!!.*/)
|
||||
encoder.text_token match, :doctype
|
||||
next
|
||||
end
|
||||
|
||||
if match = scan(/(?>( *)(\/(?!\[if)|-\#|:javascript|:ruby|:\w+) *)(?=\n)/)
|
||||
encoder.text_token match, :comment
|
||||
|
||||
code = self[2]
|
||||
if match = scan(/(?:\n+#{self[1]} .*)+/)
|
||||
case code
|
||||
when '/', '-#'
|
||||
encoder.text_token match, :comment
|
||||
when ':javascript'
|
||||
# TODO: recognize #{...} snippets inside JavaScript
|
||||
@java_script_scanner ||= CodeRay.scanner :java_script, :tokens => @tokens, :keep_tokens => true
|
||||
@java_script_scanner.tokenize match, :tokens => encoder
|
||||
when ':ruby'
|
||||
@ruby_scanner.tokenize match, :tokens => encoder
|
||||
when /:\w+/
|
||||
encoder.text_token match, :comment
|
||||
else
|
||||
raise 'else-case reached: %p' % [code]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if match = scan(/ +/)
|
||||
encoder.text_token match, :space
|
||||
end
|
||||
|
||||
if match = scan(/\/.*/)
|
||||
encoder.text_token match, :comment
|
||||
next
|
||||
end
|
||||
|
||||
if match = scan(/\\/)
|
||||
encoder.text_token match, :plain
|
||||
if match = scan(/.+/)
|
||||
@html_scanner.tokenize match, :tokens => encoder
|
||||
end
|
||||
next
|
||||
end
|
||||
|
||||
tag = false
|
||||
|
||||
if match = scan(/%[\w:]+\/?/)
|
||||
encoder.text_token match, :tag
|
||||
# if match = scan(/( +)(.+)/)
|
||||
# encoder.text_token self[1], :space
|
||||
# @embedded_ruby_scanner.tokenize self[2], :tokens => encoder
|
||||
# end
|
||||
tag = true
|
||||
end
|
||||
|
||||
while match = scan(/([.#])[-\w]*\w/)
|
||||
encoder.text_token match, self[1] == '#' ? :constant : :class
|
||||
tag = true
|
||||
end
|
||||
|
||||
if tag && match = scan(/(\()([^)]+)?(\))?/)
|
||||
# TODO: recognize title=@title, class="widget_#{@widget.number}"
|
||||
encoder.text_token self[1], :plain
|
||||
@html_scanner.tokenize self[2], :tokens => encoder, :state => :attribute if self[2]
|
||||
encoder.text_token self[3], :plain if self[3]
|
||||
end
|
||||
|
||||
if tag && match = scan(/\{/)
|
||||
encoder.text_token match, :plain
|
||||
|
||||
code = ''
|
||||
level = 1
|
||||
while true
|
||||
code << scan(/([^\{\},\n]|, *\n?)*/)
|
||||
case match = getch
|
||||
when '{'
|
||||
level += 1
|
||||
code << match
|
||||
when '}'
|
||||
level -= 1
|
||||
if level > 0
|
||||
code << match
|
||||
else
|
||||
break
|
||||
end
|
||||
when "\n", ",", nil
|
||||
break
|
||||
end
|
||||
end
|
||||
@ruby_scanner.tokenize code, :tokens => encoder unless code.empty?
|
||||
|
||||
encoder.text_token match, :plain if match
|
||||
end
|
||||
|
||||
if tag && match = scan(/(\[)([^\]\n]+)?(\])?/)
|
||||
encoder.text_token self[1], :plain
|
||||
@ruby_scanner.tokenize self[2], :tokens => encoder if self[2]
|
||||
encoder.text_token self[3], :plain if self[3]
|
||||
end
|
||||
|
||||
if tag && match = scan(/\//)
|
||||
encoder.text_token match, :tag
|
||||
end
|
||||
|
||||
if scan(/(>?<?[-=]|[&!]=|(& |!)|~)( *)([^,\n\|]+(?:(, *|\|(?=.|\n.*\|$))\n?[^,\n\|]*)*)?/)
|
||||
encoder.text_token self[1] + self[3], :plain
|
||||
if self[4]
|
||||
if self[2]
|
||||
@embedded_ruby_scanner.tokenize self[4], :tokens => encoder
|
||||
else
|
||||
@ruby_scanner.tokenize self[4], :tokens => encoder
|
||||
end
|
||||
end
|
||||
elsif match = scan(/((?:<|><?)(?![!?\/\w]))?(.+)?/)
|
||||
encoder.text_token self[1], :plain if self[1]
|
||||
# TODO: recognize #{...} snippets
|
||||
@html_scanner.tokenize self[2], :tokens => encoder if self[2]
|
||||
end
|
||||
|
||||
elsif match = scan(/.+/)
|
||||
@html_scanner.tokenize match, :tokens => encoder
|
||||
|
||||
end
|
||||
|
||||
if match = scan(/\n/)
|
||||
encoder.text_token match, :space
|
||||
end
|
||||
end
|
||||
|
||||
encoder
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,253 @@
|
|||
module CodeRay
|
||||
module Scanners
|
||||
|
||||
# HTML Scanner
|
||||
#
|
||||
# Alias: +xhtml+
|
||||
#
|
||||
# See also: Scanners::XML
|
||||
class HTML < Scanner
|
||||
|
||||
register_for :html
|
||||
|
||||
KINDS_NOT_LOC = [
|
||||
:comment, :doctype, :preprocessor,
|
||||
:tag, :attribute_name, :operator,
|
||||
:attribute_value, :string,
|
||||
:plain, :entity, :error,
|
||||
] # :nodoc:
|
||||
|
||||
EVENT_ATTRIBUTES = %w(
|
||||
onabort onafterprint onbeforeprint onbeforeunload onblur oncanplay
|
||||
oncanplaythrough onchange onclick oncontextmenu oncuechange ondblclick
|
||||
ondrag ondragdrop ondragend ondragenter ondragleave ondragover
|
||||
ondragstart ondrop ondurationchange onemptied onended onerror onfocus
|
||||
onformchange onforminput onhashchange oninput oninvalid onkeydown
|
||||
onkeypress onkeyup onload onloadeddata onloadedmetadata onloadstart
|
||||
onmessage onmousedown onmousemove onmouseout onmouseover onmouseup
|
||||
onmousewheel onmove onoffline ononline onpagehide onpageshow onpause
|
||||
onplay onplaying onpopstate onprogress onratechange onreadystatechange
|
||||
onredo onreset onresize onscroll onseeked onseeking onselect onshow
|
||||
onstalled onstorage onsubmit onsuspend ontimeupdate onundo onunload
|
||||
onvolumechange onwaiting
|
||||
)
|
||||
|
||||
IN_ATTRIBUTE = WordList::CaseIgnoring.new(nil).
|
||||
add(EVENT_ATTRIBUTES, :script)
|
||||
|
||||
ATTR_NAME = /[\w.:-]+/ # :nodoc:
|
||||
TAG_END = /\/?>/ # :nodoc:
|
||||
HEX = /[0-9a-fA-F]/ # :nodoc:
|
||||
ENTITY = /
|
||||
&
|
||||
(?:
|
||||
\w+
|
||||
|
|
||||
\#
|
||||
(?:
|
||||
\d+
|
||||
|
|
||||
x#{HEX}+
|
||||
)
|
||||
)
|
||||
;
|
||||
/ox # :nodoc:
|
||||
|
||||
PLAIN_STRING_CONTENT = {
|
||||
"'" => /[^&'>\n]+/,
|
||||
'"' => /[^&">\n]+/,
|
||||
} # :nodoc:
|
||||
|
||||
def reset
|
||||
super
|
||||
@state = :initial
|
||||
@plain_string_content = nil
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
def setup
|
||||
@state = :initial
|
||||
@plain_string_content = nil
|
||||
end
|
||||
|
||||
def scan_java_script encoder, code
|
||||
if code && !code.empty?
|
||||
@java_script_scanner ||= Scanners::JavaScript.new '', :keep_tokens => true
|
||||
# encoder.begin_group :inline
|
||||
@java_script_scanner.tokenize code, :tokens => encoder
|
||||
# encoder.end_group :inline
|
||||
end
|
||||
end
|
||||
|
||||
def scan_tokens encoder, options
|
||||
state = options[:state] || @state
|
||||
plain_string_content = @plain_string_content
|
||||
in_tag = in_attribute = nil
|
||||
|
||||
encoder.begin_group :string if state == :attribute_value_string
|
||||
|
||||
until eos?
|
||||
|
||||
if state != :in_special_tag && match = scan(/\s+/m)
|
||||
encoder.text_token match, :space
|
||||
|
||||
else
|
||||
|
||||
case state
|
||||
|
||||
when :initial
|
||||
if match = scan(/<!--(?:.*?-->|.*)/m)
|
||||
encoder.text_token match, :comment
|
||||
elsif match = scan(/<!DOCTYPE(?:.*?>|.*)/m)
|
||||
encoder.text_token match, :doctype
|
||||
elsif match = scan(/<\?xml(?:.*?\?>|.*)/m)
|
||||
encoder.text_token match, :preprocessor
|
||||
elsif match = scan(/<\?(?:.*?\?>|.*)/m)
|
||||
encoder.text_token match, :comment
|
||||
elsif match = scan(/<\/[-\w.:]*>?/m)
|
||||
in_tag = nil
|
||||
encoder.text_token match, :tag
|
||||
elsif match = scan(/<(?:(script)|[-\w.:]+)(>)?/m)
|
||||
encoder.text_token match, :tag
|
||||
in_tag = self[1]
|
||||
if self[2]
|
||||
state = :in_special_tag if in_tag
|
||||
else
|
||||
state = :attribute
|
||||
end
|
||||
elsif match = scan(/[^<>&]+/)
|
||||
encoder.text_token match, :plain
|
||||
elsif match = scan(/#{ENTITY}/ox)
|
||||
encoder.text_token match, :entity
|
||||
elsif match = scan(/[<>&]/)
|
||||
in_tag = nil
|
||||
encoder.text_token match, :error
|
||||
else
|
||||
raise_inspect '[BUG] else-case reached with state %p' % [state], encoder
|
||||
end
|
||||
|
||||
when :attribute
|
||||
if match = scan(/#{TAG_END}/o)
|
||||
encoder.text_token match, :tag
|
||||
in_attribute = nil
|
||||
if in_tag
|
||||
state = :in_special_tag
|
||||
else
|
||||
state = :initial
|
||||
end
|
||||
elsif match = scan(/#{ATTR_NAME}/o)
|
||||
in_attribute = IN_ATTRIBUTE[match]
|
||||
encoder.text_token match, :attribute_name
|
||||
state = :attribute_equal
|
||||
else
|
||||
in_tag = nil
|
||||
encoder.text_token getch, :error
|
||||
end
|
||||
|
||||
when :attribute_equal
|
||||
if match = scan(/=/) #/
|
||||
encoder.text_token match, :operator
|
||||
state = :attribute_value
|
||||
elsif scan(/#{ATTR_NAME}/o) || scan(/#{TAG_END}/o)
|
||||
state = :attribute
|
||||
next
|
||||
else
|
||||
encoder.text_token getch, :error
|
||||
state = :attribute
|
||||
end
|
||||
|
||||
when :attribute_value
|
||||
if match = scan(/#{ATTR_NAME}/o)
|
||||
encoder.text_token match, :attribute_value
|
||||
state = :attribute
|
||||
elsif match = scan(/["']/)
|
||||
if in_attribute == :script
|
||||
encoder.begin_group :inline
|
||||
encoder.text_token match, :inline_delimiter
|
||||
if scan(/javascript:[ \t]*/)
|
||||
encoder.text_token matched, :comment
|
||||
end
|
||||
code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/)
|
||||
scan_java_script encoder, code
|
||||
match = scan(/["']/)
|
||||
encoder.text_token match, :inline_delimiter if match
|
||||
encoder.end_group :inline
|
||||
state = :attribute
|
||||
in_attribute = nil
|
||||
else
|
||||
encoder.begin_group :string
|
||||
state = :attribute_value_string
|
||||
plain_string_content = PLAIN_STRING_CONTENT[match]
|
||||
encoder.text_token match, :delimiter
|
||||
end
|
||||
elsif match = scan(/#{TAG_END}/o)
|
||||
encoder.text_token match, :tag
|
||||
state = :initial
|
||||
else
|
||||
encoder.text_token getch, :error
|
||||
end
|
||||
|
||||
when :attribute_value_string
|
||||
if match = scan(plain_string_content)
|
||||
encoder.text_token match, :content
|
||||
elsif match = scan(/['"]/)
|
||||
encoder.text_token match, :delimiter
|
||||
encoder.end_group :string
|
||||
state = :attribute
|
||||
elsif match = scan(/#{ENTITY}/ox)
|
||||
encoder.text_token match, :entity
|
||||
elsif match = scan(/&/)
|
||||
encoder.text_token match, :content
|
||||
elsif match = scan(/[\n>]/)
|
||||
encoder.end_group :string
|
||||
state = :initial
|
||||
encoder.text_token match, :error
|
||||
end
|
||||
|
||||
when :in_special_tag
|
||||
case in_tag
|
||||
when 'script'
|
||||
encoder.text_token match, :space if match = scan(/[ \t]*\n/)
|
||||
if scan(/(\s*<!--)(?:(.*?)(-->)|(.*))/m)
|
||||
code = self[2] || self[4]
|
||||
closing = self[3]
|
||||
encoder.text_token self[1], :comment
|
||||
else
|
||||
code = scan_until(/(?=(?:\n\s*)?<\/script>)|\z/)
|
||||
closing = false
|
||||
end
|
||||
unless code.empty?
|
||||
encoder.begin_group :inline
|
||||
scan_java_script encoder, code
|
||||
encoder.end_group :inline
|
||||
end
|
||||
encoder.text_token closing, :comment if closing
|
||||
state = :initial
|
||||
else
|
||||
raise 'unknown special tag: %p' % [in_tag]
|
||||
end
|
||||
|
||||
else
|
||||
raise_inspect 'Unknown state: %p' % [state], encoder
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
if options[:keep_state]
|
||||
@state = state
|
||||
@plain_string_content = plain_string_content
|
||||
end
|
||||
|
||||
encoder.end_group :string if state == :attribute_value_string
|
||||
|
||||
encoder
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,174 @@
|
|||
module CodeRay
|
||||
module Scanners
|
||||
|
||||
# Scanner for Java.
|
||||
class Java < Scanner
|
||||
|
||||
register_for :java
|
||||
|
||||
autoload :BuiltinTypes, 'coderay/scanners/java/builtin_types'
|
||||
|
||||
# http://java.sun.com/docs/books/tutorial/java/nutsandbolts/_keywords.html
|
||||
KEYWORDS = %w[
|
||||
assert break case catch continue default do else
|
||||
finally for if instanceof import new package
|
||||
return switch throw try typeof while
|
||||
debugger export
|
||||
] # :nodoc:
|
||||
RESERVED = %w[ const goto ] # :nodoc:
|
||||
CONSTANTS = %w[ false null true ] # :nodoc:
|
||||
MAGIC_VARIABLES = %w[ this super ] # :nodoc:
|
||||
TYPES = %w[
|
||||
boolean byte char class double enum float int interface long
|
||||
short void
|
||||
] << '[]' # :nodoc: because int[] should be highlighted as a type
|
||||
DIRECTIVES = %w[
|
||||
abstract extends final implements native private protected public
|
||||
static strictfp synchronized throws transient volatile
|
||||
] # :nodoc:
|
||||
|
||||
IDENT_KIND = WordList.new(:ident).
|
||||
add(KEYWORDS, :keyword).
|
||||
add(RESERVED, :reserved).
|
||||
add(CONSTANTS, :predefined_constant).
|
||||
add(MAGIC_VARIABLES, :local_variable).
|
||||
add(TYPES, :type).
|
||||
add(BuiltinTypes::List, :predefined_type).
|
||||
add(BuiltinTypes::List.select { |builtin| builtin[/(Error|Exception)$/] }, :exception).
|
||||
add(DIRECTIVES, :directive) # :nodoc:
|
||||
|
||||
ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
|
||||
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
|
||||
STRING_CONTENT_PATTERN = {
|
||||
"'" => /[^\\']+/,
|
||||
'"' => /[^\\"]+/,
|
||||
'/' => /[^\\\/]+/,
|
||||
} # :nodoc:
|
||||
IDENT = /[a-zA-Z_][A-Za-z_0-9]*/ # :nodoc:
|
||||
|
||||
protected
|
||||
|
||||
def scan_tokens encoder, options
|
||||
|
||||
state = :initial
|
||||
string_delimiter = nil
|
||||
package_name_expected = false
|
||||
class_name_follows = false
|
||||
last_token_dot = false
|
||||
|
||||
until eos?
|
||||
|
||||
case state
|
||||
|
||||
when :initial
|
||||
|
||||
if match = scan(/ \s+ | \\\n /x)
|
||||
encoder.text_token match, :space
|
||||
next
|
||||
|
||||
elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
||||
encoder.text_token match, :comment
|
||||
next
|
||||
|
||||
elsif package_name_expected && match = scan(/ #{IDENT} (?: \. #{IDENT} )* /ox)
|
||||
encoder.text_token match, package_name_expected
|
||||
|
||||
elsif match = scan(/ #{IDENT} | \[\] /ox)
|
||||
kind = IDENT_KIND[match]
|
||||
if last_token_dot
|
||||
kind = :ident
|
||||
elsif class_name_follows
|
||||
kind = :class
|
||||
class_name_follows = false
|
||||
else
|
||||
case match
|
||||
when 'import'
|
||||
package_name_expected = :include
|
||||
when 'package'
|
||||
package_name_expected = :namespace
|
||||
when 'class', 'interface'
|
||||
class_name_follows = true
|
||||
end
|
||||
end
|
||||
encoder.text_token match, kind
|
||||
|
||||
elsif match = scan(/ \.(?!\d) | [,?:()\[\]}] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? | <<<?=? | >>>?=? /x)
|
||||
encoder.text_token match, :operator
|
||||
|
||||
elsif match = scan(/;/)
|
||||
package_name_expected = false
|
||||
encoder.text_token match, :operator
|
||||
|
||||
elsif match = scan(/\{/)
|
||||
class_name_follows = false
|
||||
encoder.text_token match, :operator
|
||||
|
||||
elsif check(/[\d.]/)
|
||||
if match = scan(/0[xX][0-9A-Fa-f]+/)
|
||||
encoder.text_token match, :hex
|
||||
elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
|
||||
encoder.text_token match, :octal
|
||||
elsif match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
|
||||
encoder.text_token match, :float
|
||||
elsif match = scan(/\d+[lL]?/)
|
||||
encoder.text_token match, :integer
|
||||
end
|
||||
|
||||
elsif match = scan(/["']/)
|
||||
state = :string
|
||||
encoder.begin_group state
|
||||
string_delimiter = match
|
||||
encoder.text_token match, :delimiter
|
||||
|
||||
elsif match = scan(/ @ #{IDENT} /ox)
|
||||
encoder.text_token match, :annotation
|
||||
|
||||
else
|
||||
encoder.text_token getch, :error
|
||||
|
||||
end
|
||||
|
||||
when :string
|
||||
if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
|
||||
encoder.text_token match, :content
|
||||
elsif match = scan(/["'\/]/)
|
||||
encoder.text_token match, :delimiter
|
||||
encoder.end_group state
|
||||
state = :initial
|
||||
string_delimiter = nil
|
||||
elsif state == :string && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
|
||||
if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
|
||||
encoder.text_token match, :content
|
||||
else
|
||||
encoder.text_token match, :char
|
||||
end
|
||||
elsif match = scan(/\\./m)
|
||||
encoder.text_token match, :content
|
||||
elsif match = scan(/ \\ | $ /x)
|
||||
encoder.end_group state
|
||||
state = :initial
|
||||
encoder.text_token match, :error
|
||||
else
|
||||
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
|
||||
end
|
||||
|
||||
else
|
||||
raise_inspect 'Unknown state', encoder
|
||||
|
||||
end
|
||||
|
||||
last_token_dot = match == '.'
|
||||
|
||||
end
|
||||
|
||||
if state == :string
|
||||
encoder.end_group state
|
||||
end
|
||||
|
||||
encoder
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,213 @@
|
|||
module CodeRay
|
||||
module Scanners
|
||||
|
||||
# Scanner for JavaScript.
|
||||
#
|
||||
# Aliases: +ecmascript+, +ecma_script+, +javascript+
|
||||
class JavaScript < Scanner
|
||||
|
||||
register_for :java_script
|
||||
file_extension 'js'
|
||||
|
||||
# The actual JavaScript keywords.
|
||||
KEYWORDS = %w[
|
||||
break case catch continue default delete do else
|
||||
finally for function if in instanceof new
|
||||
return switch throw try typeof var void while with
|
||||
] # :nodoc:
|
||||
PREDEFINED_CONSTANTS = %w[
|
||||
false null true undefined NaN Infinity
|
||||
] # :nodoc:
|
||||
|
||||
MAGIC_VARIABLES = %w[ this arguments ] # :nodoc: arguments was introduced in JavaScript 1.4
|
||||
|
||||
KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
|
||||
case delete in instanceof new return throw typeof with
|
||||
] # :nodoc:
|
||||
|
||||
# Reserved for future use.
|
||||
RESERVED_WORDS = %w[
|
||||
abstract boolean byte char class debugger double enum export extends
|
||||
final float goto implements import int interface long native package
|
||||
private protected public short static super synchronized throws transient
|
||||
volatile
|
||||
] # :nodoc:
|
||||
|
||||
IDENT_KIND = WordList.new(:ident).
|
||||
add(RESERVED_WORDS, :reserved).
|
||||
add(PREDEFINED_CONSTANTS, :predefined_constant).
|
||||
add(MAGIC_VARIABLES, :local_variable).
|
||||
add(KEYWORDS, :keyword) # :nodoc:
|
||||
|
||||
ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
|
||||
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
|
||||
REGEXP_ESCAPE = / [bBdDsSwW] /x # :nodoc:
|
||||
STRING_CONTENT_PATTERN = {
|
||||
"'" => /[^\\']+/,
|
||||
'"' => /[^\\"]+/,
|
||||
'/' => /[^\\\/]+/,
|
||||
} # :nodoc:
|
||||
KEY_CHECK_PATTERN = {
|
||||
"'" => / (?> [^\\']* (?: \\. [^\\']* )* ) ' \s* : /mx,
|
||||
'"' => / (?> [^\\"]* (?: \\. [^\\"]* )* ) " \s* : /mx,
|
||||
} # :nodoc:
|
||||
|
||||
protected
|
||||
|
||||
def scan_tokens encoder, options
|
||||
|
||||
state = :initial
|
||||
string_delimiter = nil
|
||||
value_expected = true
|
||||
key_expected = false
|
||||
function_expected = false
|
||||
|
||||
until eos?
|
||||
|
||||
case state
|
||||
|
||||
when :initial
|
||||
|
||||
if match = scan(/ \s+ | \\\n /x)
|
||||
value_expected = true if !value_expected && match.index(?\n)
|
||||
encoder.text_token match, :space
|
||||
|
||||
elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
||||
value_expected = true
|
||||
encoder.text_token match, :comment
|
||||
|
||||
elsif check(/\.?\d/)
|
||||
key_expected = value_expected = false
|
||||
if match = scan(/0[xX][0-9A-Fa-f]+/)
|
||||
encoder.text_token match, :hex
|
||||
elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
|
||||
encoder.text_token match, :octal
|
||||
elsif match = scan(/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
|
||||
encoder.text_token match, :float
|
||||
elsif match = scan(/\d+/)
|
||||
encoder.text_token match, :integer
|
||||
end
|
||||
|
||||
elsif value_expected && match = scan(/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim)
|
||||
# TODO: scan over nested tags
|
||||
xml_scanner.tokenize match, :tokens => encoder
|
||||
value_expected = false
|
||||
next
|
||||
|
||||
elsif match = scan(/ [-+*=<>?:;,!&^|(\[{~%]+ | \.(?!\d) /x)
|
||||
value_expected = true
|
||||
last_operator = match[-1]
|
||||
key_expected = (last_operator == ?{) || (last_operator == ?,)
|
||||
function_expected = false
|
||||
encoder.text_token match, :operator
|
||||
|
||||
elsif match = scan(/ [)\]}]+ /x)
|
||||
function_expected = key_expected = value_expected = false
|
||||
encoder.text_token match, :operator
|
||||
|
||||
elsif match = scan(/ [$a-zA-Z_][A-Za-z_0-9$]* /x)
|
||||
kind = IDENT_KIND[match]
|
||||
value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
|
||||
# TODO: labels
|
||||
if kind == :ident
|
||||
if match.index(?$) # $ allowed inside an identifier
|
||||
kind = :predefined
|
||||
elsif function_expected
|
||||
kind = :function
|
||||
elsif check(/\s*[=:]\s*function\b/)
|
||||
kind = :function
|
||||
elsif key_expected && check(/\s*:/)
|
||||
kind = :key
|
||||
end
|
||||
end
|
||||
function_expected = (kind == :keyword) && (match == 'function')
|
||||
key_expected = false
|
||||
encoder.text_token match, kind
|
||||
|
||||
elsif match = scan(/["']/)
|
||||
if key_expected && check(KEY_CHECK_PATTERN[match])
|
||||
state = :key
|
||||
else
|
||||
state = :string
|
||||
end
|
||||
encoder.begin_group state
|
||||
string_delimiter = match
|
||||
encoder.text_token match, :delimiter
|
||||
|
||||
elsif value_expected && (match = scan(/\//))
|
||||
encoder.begin_group :regexp
|
||||
state = :regexp
|
||||
string_delimiter = '/'
|
||||
encoder.text_token match, :delimiter
|
||||
|
||||
elsif match = scan(/ \/ /x)
|
||||
value_expected = true
|
||||
key_expected = false
|
||||
encoder.text_token match, :operator
|
||||
|
||||
else
|
||||
encoder.text_token getch, :error
|
||||
|
||||
end
|
||||
|
||||
when :string, :regexp, :key
|
||||
if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
|
||||
encoder.text_token match, :content
|
||||
elsif match = scan(/["'\/]/)
|
||||
encoder.text_token match, :delimiter
|
||||
if state == :regexp
|
||||
modifiers = scan(/[gim]+/)
|
||||
encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
|
||||
end
|
||||
encoder.end_group state
|
||||
string_delimiter = nil
|
||||
key_expected = value_expected = false
|
||||
state = :initial
|
||||
elsif state != :regexp && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
|
||||
if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
|
||||
encoder.text_token match, :content
|
||||
else
|
||||
encoder.text_token match, :char
|
||||
end
|
||||
elsif state == :regexp && match = scan(/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
|
||||
encoder.text_token match, :char
|
||||
elsif match = scan(/\\./m)
|
||||
encoder.text_token match, :content
|
||||
elsif match = scan(/ \\ | $ /x)
|
||||
encoder.end_group state
|
||||
encoder.text_token match, :error
|
||||
key_expected = value_expected = false
|
||||
state = :initial
|
||||
else
|
||||
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
|
||||
end
|
||||
|
||||
else
|
||||
raise_inspect 'Unknown state', encoder
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
if [:string, :regexp].include? state
|
||||
encoder.end_group state
|
||||
end
|
||||
|
||||
encoder
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
def reset_instance
|
||||
super
|
||||
@xml_scanner.reset if defined? @xml_scanner
|
||||
end
|
||||
|
||||
def xml_scanner
|
||||
@xml_scanner ||= CodeRay.scanner :xml, :tokens => @tokens, :keep_tokens => true, :keep_state => false
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,95 @@
|
|||
module CodeRay
|
||||
module Scanners
|
||||
|
||||
# Scanner for JSON (JavaScript Object Notation).
|
||||
class JSON < Scanner
|
||||
|
||||
register_for :json
|
||||
file_extension 'json'
|
||||
|
||||
KINDS_NOT_LOC = [
|
||||
:float, :char, :content, :delimiter,
|
||||
:error, :integer, :operator, :value,
|
||||
] # :nodoc:
|
||||
|
||||
ESCAPE = / [bfnrt\\"\/] /x # :nodoc:
|
||||
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc:
|
||||
|
||||
protected
|
||||
|
||||
# See http://json.org/ for a definition of the JSON lexic/grammar.
|
||||
def scan_tokens encoder, options
|
||||
|
||||
state = :initial
|
||||
stack = []
|
||||
key_expected = false
|
||||
|
||||
until eos?
|
||||
|
||||
case state
|
||||
|
||||
when :initial
|
||||
if match = scan(/ \s+ /x)
|
||||
encoder.text_token match, :space
|
||||
elsif match = scan(/"/)
|
||||
state = key_expected ? :key : :string
|
||||
encoder.begin_group state
|
||||
encoder.text_token match, :delimiter
|
||||
elsif match = scan(/ [:,\[{\]}] /x)
|
||||
encoder.text_token match, :operator
|
||||
case match
|
||||
when ':' then key_expected = false
|
||||
when ',' then key_expected = true if stack.last == :object
|
||||
when '{' then stack << :object; key_expected = true
|
||||
when '[' then stack << :array
|
||||
when '}', ']' then stack.pop # no error recovery, but works for valid JSON
|
||||
end
|
||||
elsif match = scan(/ true | false | null /x)
|
||||
encoder.text_token match, :value
|
||||
elsif match = scan(/ -? (?: 0 | [1-9]\d* ) /x)
|
||||
if scan(/ \.\d+ (?:[eE][-+]?\d+)? | [eE][-+]? \d+ /x)
|
||||
match << matched
|
||||
encoder.text_token match, :float
|
||||
else
|
||||
encoder.text_token match, :integer
|
||||
end
|
||||
else
|
||||
encoder.text_token getch, :error
|
||||
end
|
||||
|
||||
when :string, :key
|
||||
if match = scan(/[^\\"]+/)
|
||||
encoder.text_token match, :content
|
||||
elsif match = scan(/"/)
|
||||
encoder.text_token match, :delimiter
|
||||
encoder.end_group state
|
||||
state = :initial
|
||||
elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
|
||||
encoder.text_token match, :char
|
||||
elsif match = scan(/\\./m)
|
||||
encoder.text_token match, :content
|
||||
elsif match = scan(/ \\ | $ /x)
|
||||
encoder.end_group state
|
||||
encoder.text_token match, :error
|
||||
state = :initial
|
||||
else
|
||||
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
|
||||
end
|
||||
|
||||
else
|
||||
raise_inspect 'Unknown state: %p' % [state], encoder
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
if [:string, :key].include? state
|
||||
encoder.end_group state
|
||||
end
|
||||
|
||||
encoder
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,509 @@
|
|||
module CodeRay
|
||||
module Scanners
|
||||
|
||||
load :html
|
||||
|
||||
# Scanner for PHP.
|
||||
#
|
||||
# Original by Stefan Walk.
|
||||
class PHP < Scanner
|
||||
|
||||
register_for :php
|
||||
file_extension 'php'
|
||||
encoding 'BINARY'
|
||||
|
||||
KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
|
||||
|
||||
protected
|
||||
|
||||
def setup
|
||||
@html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
|
||||
end
|
||||
|
||||
def reset_instance
|
||||
super
|
||||
@html_scanner.reset
|
||||
end
|
||||
|
||||
module Words # :nodoc:
|
||||
|
||||
# according to http://www.php.net/manual/en/reserved.keywords.php
|
||||
KEYWORDS = %w[
|
||||
abstract and array as break case catch class clone const continue declare default do else elseif
|
||||
enddeclare endfor endforeach endif endswitch endwhile extends final for foreach function global
|
||||
goto if implements interface instanceof namespace new or private protected public static switch
|
||||
throw try use var while xor
|
||||
cfunction old_function
|
||||
]
|
||||
|
||||
TYPES = %w[ int integer float double bool boolean string array object resource ]
|
||||
|
||||
LANGUAGE_CONSTRUCTS = %w[
|
||||
die echo empty exit eval include include_once isset list
|
||||
require require_once return print unset
|
||||
]
|
||||
|
||||
CLASSES = %w[ Directory stdClass __PHP_Incomplete_Class exception php_user_filter Closure ]
|
||||
|
||||
# according to http://php.net/quickref.php on 2009-04-21;
|
||||
# all functions with _ excluded (module functions) and selected additional functions
|
||||
BUILTIN_FUNCTIONS = %w[
|
||||
abs acos acosh addcslashes addslashes aggregate array arsort ascii2ebcdic asin asinh asort assert atan atan2
|
||||
atanh basename bcadd bccomp bcdiv bcmod bcmul bcpow bcpowmod bcscale bcsqrt bcsub bin2hex bindec
|
||||
bindtextdomain bzclose bzcompress bzdecompress bzerrno bzerror bzerrstr bzflush bzopen bzread bzwrite
|
||||
calculhmac ceil chdir checkdate checkdnsrr chgrp chmod chop chown chr chroot clearstatcache closedir closelog
|
||||
compact constant copy cos cosh count crc32 crypt current date dcgettext dcngettext deaggregate decbin dechex
|
||||
decoct define defined deg2rad delete dgettext die dirname diskfreespace dl dngettext doubleval each
|
||||
ebcdic2ascii echo empty end ereg eregi escapeshellarg escapeshellcmd eval exec exit exp explode expm1 extract
|
||||
fclose feof fflush fgetc fgetcsv fgets fgetss file fileatime filectime filegroup fileinode filemtime fileowner
|
||||
fileperms filepro filesize filetype floatval flock floor flush fmod fnmatch fopen fpassthru fprintf fputcsv
|
||||
fputs fread frenchtojd fscanf fseek fsockopen fstat ftell ftok ftruncate fwrite getallheaders getcwd getdate
|
||||
getenv gethostbyaddr gethostbyname gethostbynamel getimagesize getlastmod getmxrr getmygid getmyinode getmypid
|
||||
getmyuid getopt getprotobyname getprotobynumber getrandmax getrusage getservbyname getservbyport gettext
|
||||
gettimeofday gettype glob gmdate gmmktime gmstrftime gregoriantojd gzclose gzcompress gzdecode gzdeflate
|
||||
gzencode gzeof gzfile gzgetc gzgets gzgetss gzinflate gzopen gzpassthru gzputs gzread gzrewind gzseek gztell
|
||||
gzuncompress gzwrite hash header hebrev hebrevc hexdec htmlentities htmlspecialchars hypot iconv idate
|
||||
implode include intval ip2long iptcembed iptcparse isset
|
||||
jddayofweek jdmonthname jdtofrench jdtogregorian jdtojewish jdtojulian jdtounix jewishtojd join jpeg2wbmp
|
||||
juliantojd key krsort ksort lcfirst lchgrp lchown levenshtein link linkinfo list localeconv localtime log
|
||||
log10 log1p long2ip lstat ltrim mail main max md5 metaphone mhash microtime min mkdir mktime msql natcasesort
|
||||
natsort next ngettext nl2br nthmac octdec opendir openlog
|
||||
ord overload pack passthru pathinfo pclose pfsockopen phpcredits phpinfo phpversion pi png2wbmp popen pos pow
|
||||
prev print printf putenv quotemeta rad2deg rand range rawurldecode rawurlencode readdir readfile readgzfile
|
||||
readline readlink realpath recode rename require reset rewind rewinddir rmdir round rsort rtrim scandir
|
||||
serialize setcookie setlocale setrawcookie settype sha1 shuffle signeurlpaiement sin sinh sizeof sleep snmpget
|
||||
snmpgetnext snmprealwalk snmpset snmpwalk snmpwalkoid sort soundex split spliti sprintf sqrt srand sscanf stat
|
||||
strcasecmp strchr strcmp strcoll strcspn strftime stripcslashes stripos stripslashes stristr strlen
|
||||
strnatcasecmp strnatcmp strncasecmp strncmp strpbrk strpos strptime strrchr strrev strripos strrpos strspn
|
||||
strstr strtok strtolower strtotime strtoupper strtr strval substr symlink syslog system tan tanh tempnam
|
||||
textdomain time tmpfile touch trim uasort ucfirst ucwords uksort umask uniqid unixtojd unlink unpack
|
||||
unserialize unset urldecode urlencode usleep usort vfprintf virtual vprintf vsprintf wordwrap
|
||||
array_change_key_case array_chunk array_combine array_count_values array_diff array_diff_assoc
|
||||
array_diff_key array_diff_uassoc array_diff_ukey array_fill array_fill_keys array_filter array_flip
|
||||
array_intersect array_intersect_assoc array_intersect_key array_intersect_uassoc array_intersect_ukey
|
||||
array_key_exists array_keys array_map array_merge array_merge_recursive array_multisort array_pad
|
||||
array_pop array_product array_push array_rand array_reduce array_reverse array_search array_shift
|
||||
array_slice array_splice array_sum array_udiff array_udiff_assoc array_udiff_uassoc array_uintersect
|
||||
array_uintersect_assoc array_uintersect_uassoc array_unique array_unshift array_values array_walk
|
||||
array_walk_recursive
|
||||
assert_options base_convert base64_decode base64_encode
|
||||
chunk_split class_exists class_implements class_parents
|
||||
count_chars debug_backtrace debug_print_backtrace debug_zval_dump
|
||||
error_get_last error_log error_reporting extension_loaded
|
||||
file_exists file_get_contents file_put_contents load_file
|
||||
func_get_arg func_get_args func_num_args function_exists
|
||||
get_browser get_called_class get_cfg_var get_class get_class_methods get_class_vars
|
||||
get_current_user get_declared_classes get_declared_interfaces get_defined_constants
|
||||
get_defined_functions get_defined_vars get_extension_funcs get_headers get_html_translation_table
|
||||
get_include_path get_included_files get_loaded_extensions get_magic_quotes_gpc get_magic_quotes_runtime
|
||||
get_meta_tags get_object_vars get_parent_class get_required_filesget_resource_type
|
||||
gc_collect_cycles gc_disable gc_enable gc_enabled
|
||||
halt_compiler headers_list headers_sent highlight_file highlight_string
|
||||
html_entity_decode htmlspecialchars_decode
|
||||
in_array include_once inclued_get_data
|
||||
is_a is_array is_binary is_bool is_buffer is_callable is_dir is_double is_executable is_file is_finite
|
||||
is_float is_infinite is_int is_integer is_link is_long is_nan is_null is_numeric is_object is_readable
|
||||
is_real is_resource is_scalar is_soap_fault is_string is_subclass_of is_unicode is_uploaded_file
|
||||
is_writable is_writeable
|
||||
locale_get_default locale_set_default
|
||||
number_format override_function parse_str parse_url
|
||||
php_check_syntax php_ini_loaded_file php_ini_scanned_files php_logo_guid php_sapi_name
|
||||
php_strip_whitespace php_uname
|
||||
preg_filter preg_grep preg_last_error preg_match preg_match_all preg_quote preg_replace
|
||||
preg_replace_callback preg_split print_r
|
||||
require_once register_shutdown_function register_tick_function
|
||||
set_error_handler set_exception_handler set_file_buffer set_include_path
|
||||
set_magic_quotes_runtime set_time_limit shell_exec
|
||||
str_getcsv str_ireplace str_pad str_repeat str_replace str_rot13 str_shuffle str_split str_word_count
|
||||
strip_tags substr_compare substr_count substr_replace
|
||||
time_nanosleep time_sleep_until
|
||||
token_get_all token_name trigger_error
|
||||
unregister_tick_function use_soap_error_handler user_error
|
||||
utf8_decode utf8_encode var_dump var_export
|
||||
version_compare
|
||||
zend_logo_guid zend_thread_id zend_version
|
||||
create_function call_user_func_array
|
||||
posix_access posix_ctermid posix_get_last_error posix_getcwd posix_getegid
|
||||
posix_geteuid posix_getgid posix_getgrgid posix_getgrnam posix_getgroups
|
||||
posix_getlogin posix_getpgid posix_getpgrp posix_getpid posix_getppid
|
||||
posix_getpwnam posix_getpwuid posix_getrlimit posix_getsid posix_getuid
|
||||
posix_initgroups posix_isatty posix_kill posix_mkfifo posix_mknod
|
||||
posix_setegid posix_seteuid posix_setgid posix_setpgid posix_setsid
|
||||
posix_setuid posix_strerror posix_times posix_ttyname posix_uname
|
||||
pcntl_alarm pcntl_exec pcntl_fork pcntl_getpriority pcntl_setpriority
|
||||
pcntl_signal pcntl_signal_dispatch pcntl_sigprocmask pcntl_sigtimedwait
|
||||
pcntl_sigwaitinfo pcntl_wait pcntl_waitpid pcntl_wexitstatus pcntl_wifexited
|
||||
pcntl_wifsignaled pcntl_wifstopped pcntl_wstopsig pcntl_wtermsig
|
||||
]
|
||||
# TODO: more built-in PHP functions?
|
||||
|
||||
EXCEPTIONS = %w[
|
||||
E_ERROR E_WARNING E_PARSE E_NOTICE E_CORE_ERROR E_CORE_WARNING E_COMPILE_ERROR E_COMPILE_WARNING
|
||||
E_USER_ERROR E_USER_WARNING E_USER_NOTICE E_DEPRECATED E_USER_DEPRECATED E_ALL E_STRICT
|
||||
]
|
||||
|
||||
CONSTANTS = %w[
|
||||
null true false self parent
|
||||
__LINE__ __DIR__ __FILE__ __LINE__
|
||||
__CLASS__ __NAMESPACE__ __METHOD__ __FUNCTION__
|
||||
PHP_VERSION PHP_MAJOR_VERSION PHP_MINOR_VERSION PHP_RELEASE_VERSION PHP_VERSION_ID PHP_EXTRA_VERSION PHP_ZTS
|
||||
PHP_DEBUG PHP_MAXPATHLEN PHP_OS PHP_SAPI PHP_EOL PHP_INT_MAX PHP_INT_SIZE DEFAULT_INCLUDE_PATH
|
||||
PEAR_INSTALL_DIR PEAR_EXTENSION_DIR PHP_EXTENSION_DIR PHP_PREFIX PHP_BINDIR PHP_LIBDIR PHP_DATADIR
|
||||
PHP_SYSCONFDIR PHP_LOCALSTATEDIR PHP_CONFIG_FILE_PATH PHP_CONFIG_FILE_SCAN_DIR PHP_SHLIB_SUFFIX
|
||||
PHP_OUTPUT_HANDLER_START PHP_OUTPUT_HANDLER_CONT PHP_OUTPUT_HANDLER_END
|
||||
__COMPILER_HALT_OFFSET__
|
||||
EXTR_OVERWRITE EXTR_SKIP EXTR_PREFIX_SAME EXTR_PREFIX_ALL EXTR_PREFIX_INVALID EXTR_PREFIX_IF_EXISTS
|
||||
EXTR_IF_EXISTS SORT_ASC SORT_DESC SORT_REGULAR SORT_NUMERIC SORT_STRING CASE_LOWER CASE_UPPER COUNT_NORMAL
|
||||
COUNT_RECURSIVE ASSERT_ACTIVE ASSERT_CALLBACK ASSERT_BAIL ASSERT_WARNING ASSERT_QUIET_EVAL CONNECTION_ABORTED
|
||||
CONNECTION_NORMAL CONNECTION_TIMEOUT INI_USER INI_PERDIR INI_SYSTEM INI_ALL M_E M_LOG2E M_LOG10E M_LN2 M_LN10
|
||||
M_PI M_PI_2 M_PI_4 M_1_PI M_2_PI M_2_SQRTPI M_SQRT2 M_SQRT1_2 CRYPT_SALT_LENGTH CRYPT_STD_DES CRYPT_EXT_DES
|
||||
CRYPT_MD5 CRYPT_BLOWFISH DIRECTORY_SEPARATOR SEEK_SET SEEK_CUR SEEK_END LOCK_SH LOCK_EX LOCK_UN LOCK_NB
|
||||
HTML_SPECIALCHARS HTML_ENTITIES ENT_COMPAT ENT_QUOTES ENT_NOQUOTES INFO_GENERAL INFO_CREDITS
|
||||
INFO_CONFIGURATION INFO_MODULES INFO_ENVIRONMENT INFO_VARIABLES INFO_LICENSE INFO_ALL CREDITS_GROUP
|
||||
CREDITS_GENERAL CREDITS_SAPI CREDITS_MODULES CREDITS_DOCS CREDITS_FULLPAGE CREDITS_QA CREDITS_ALL STR_PAD_LEFT
|
||||
STR_PAD_RIGHT STR_PAD_BOTH PATHINFO_DIRNAME PATHINFO_BASENAME PATHINFO_EXTENSION PATH_SEPARATOR CHAR_MAX
|
||||
LC_CTYPE LC_NUMERIC LC_TIME LC_COLLATE LC_MONETARY LC_ALL LC_MESSAGES ABDAY_1 ABDAY_2 ABDAY_3 ABDAY_4 ABDAY_5
|
||||
ABDAY_6 ABDAY_7 DAY_1 DAY_2 DAY_3 DAY_4 DAY_5 DAY_6 DAY_7 ABMON_1 ABMON_2 ABMON_3 ABMON_4 ABMON_5 ABMON_6
|
||||
ABMON_7 ABMON_8 ABMON_9 ABMON_10 ABMON_11 ABMON_12 MON_1 MON_2 MON_3 MON_4 MON_5 MON_6 MON_7 MON_8 MON_9
|
||||
MON_10 MON_11 MON_12 AM_STR PM_STR D_T_FMT D_FMT T_FMT T_FMT_AMPM ERA ERA_YEAR ERA_D_T_FMT ERA_D_FMT ERA_T_FMT
|
||||
ALT_DIGITS INT_CURR_SYMBOL CURRENCY_SYMBOL CRNCYSTR MON_DECIMAL_POINT MON_THOUSANDS_SEP MON_GROUPING
|
||||
POSITIVE_SIGN NEGATIVE_SIGN INT_FRAC_DIGITS FRAC_DIGITS P_CS_PRECEDES P_SEP_BY_SPACE N_CS_PRECEDES
|
||||
N_SEP_BY_SPACE P_SIGN_POSN N_SIGN_POSN DECIMAL_POINT RADIXCHAR THOUSANDS_SEP THOUSEP GROUPING YESEXPR NOEXPR
|
||||
YESSTR NOSTR CODESET LOG_EMERG LOG_ALERT LOG_CRIT LOG_ERR LOG_WARNING LOG_NOTICE LOG_INFO LOG_DEBUG LOG_KERN
|
||||
LOG_USER LOG_MAIL LOG_DAEMON LOG_AUTH LOG_SYSLOG LOG_LPR LOG_NEWS LOG_UUCP LOG_CRON LOG_AUTHPRIV LOG_LOCAL0
|
||||
LOG_LOCAL1 LOG_LOCAL2 LOG_LOCAL3 LOG_LOCAL4 LOG_LOCAL5 LOG_LOCAL6 LOG_LOCAL7 LOG_PID LOG_CONS LOG_ODELAY
|
||||
LOG_NDELAY LOG_NOWAIT LOG_PERROR
|
||||
]
|
||||
|
||||
PREDEFINED = %w[
|
||||
$GLOBALS $_SERVER $_GET $_POST $_FILES $_REQUEST $_SESSION $_ENV
|
||||
$_COOKIE $php_errormsg $HTTP_RAW_POST_DATA $http_response_header
|
||||
$argc $argv
|
||||
]
|
||||
|
||||
IDENT_KIND = WordList::CaseIgnoring.new(:ident).
|
||||
add(KEYWORDS, :keyword).
|
||||
add(TYPES, :predefined_type).
|
||||
add(LANGUAGE_CONSTRUCTS, :keyword).
|
||||
add(BUILTIN_FUNCTIONS, :predefined).
|
||||
add(CLASSES, :predefined_constant).
|
||||
add(EXCEPTIONS, :exception).
|
||||
add(CONSTANTS, :predefined_constant)
|
||||
|
||||
VARIABLE_KIND = WordList.new(:local_variable).
|
||||
add(PREDEFINED, :predefined)
|
||||
end
|
||||
|
||||
module RE # :nodoc:
|
||||
|
||||
PHP_START = /
|
||||
<script\s+[^>]*?language\s*=\s*"php"[^>]*?> |
|
||||
<script\s+[^>]*?language\s*=\s*'php'[^>]*?> |
|
||||
<\?php\d? |
|
||||
<\?(?!xml)
|
||||
/xi
|
||||
|
||||
PHP_END = %r!
|
||||
</script> |
|
||||
\?>
|
||||
!xi
|
||||
|
||||
HTML_INDICATOR = /<!DOCTYPE html|<(?:html|body|div|p)[> ]/i
|
||||
|
||||
IDENTIFIER = /[a-z_\x7f-\xFF][a-z0-9_\x7f-\xFF]*/i
|
||||
VARIABLE = /\$#{IDENTIFIER}/
|
||||
|
||||
OPERATOR = /
|
||||
\.(?!\d)=? | # dot that is not decimal point, string concatenation
|
||||
&& | \|\| | # logic
|
||||
:: | -> | => | # scope, member, dictionary
|
||||
\\(?!\n) | # namespace
|
||||
\+\+ | -- | # increment, decrement
|
||||
[,;?:()\[\]{}] | # simple delimiters
|
||||
[-+*\/%&|^]=? | # ordinary math, binary logic, assignment shortcuts
|
||||
[~$] | # whatever
|
||||
=& | # reference assignment
|
||||
[=!]=?=? | <> | # comparison and assignment
|
||||
<<=? | >>=? | [<>]=? # comparison and shift
|
||||
/x
|
||||
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
def scan_tokens encoder, options
|
||||
|
||||
if check(RE::PHP_START) || # starts with <?
|
||||
(match?(/\s*<\S/) && check(/.{1,1000}#{RE::PHP_START}/om)) || # starts with tag and contains <?
|
||||
check(/.{0,1000}#{RE::HTML_INDICATOR}/om) ||
|
||||
check(/.{1,100}#{RE::PHP_START}/om) # PHP start after max 100 chars
|
||||
# is HTML with embedded PHP, so start with HTML
|
||||
states = [:initial]
|
||||
else
|
||||
# is just PHP, so start with PHP surrounded by HTML
|
||||
states = [:initial, :php]
|
||||
end
|
||||
|
||||
label_expected = true
|
||||
case_expected = false
|
||||
|
||||
heredoc_delimiter = nil
|
||||
delimiter = nil
|
||||
modifier = nil
|
||||
|
||||
until eos?
|
||||
|
||||
case states.last
|
||||
|
||||
when :initial # HTML
|
||||
if match = scan(RE::PHP_START)
|
||||
encoder.text_token match, :inline_delimiter
|
||||
label_expected = true
|
||||
states << :php
|
||||
else
|
||||
match = scan_until(/(?=#{RE::PHP_START})/o) || scan_rest
|
||||
@html_scanner.tokenize match unless match.empty?
|
||||
end
|
||||
|
||||
when :php
|
||||
if match = scan(/\s+/)
|
||||
encoder.text_token match, :space
|
||||
|
||||
elsif match = scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo)
|
||||
encoder.text_token match, :comment
|
||||
|
||||
elsif match = scan(RE::IDENTIFIER)
|
||||
kind = Words::IDENT_KIND[match]
|
||||
if kind == :ident && label_expected && check(/:(?!:)/)
|
||||
kind = :label
|
||||
label_expected = true
|
||||
else
|
||||
label_expected = false
|
||||
if kind == :ident && match =~ /^[A-Z]/
|
||||
kind = :constant
|
||||
elsif kind == :keyword
|
||||
case match
|
||||
when 'class'
|
||||
states << :class_expected
|
||||
when 'function'
|
||||
states << :function_expected
|
||||
when 'case', 'default'
|
||||
case_expected = true
|
||||
end
|
||||
elsif match == 'b' && check(/['"]/) # binary string literal
|
||||
modifier = match
|
||||
next
|
||||
end
|
||||
end
|
||||
encoder.text_token match, kind
|
||||
|
||||
elsif match = scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
|
||||
label_expected = false
|
||||
encoder.text_token match, :float
|
||||
|
||||
elsif match = scan(/0x[0-9a-fA-F]+/)
|
||||
label_expected = false
|
||||
encoder.text_token match, :hex
|
||||
|
||||
elsif match = scan(/\d+/)
|
||||
label_expected = false
|
||||
encoder.text_token match, :integer
|
||||
|
||||
elsif match = scan(/['"`]/)
|
||||
encoder.begin_group :string
|
||||
if modifier
|
||||
encoder.text_token modifier, :modifier
|
||||
modifier = nil
|
||||
end
|
||||
delimiter = match
|
||||
encoder.text_token match, :delimiter
|
||||
states.push match == "'" ? :sqstring : :dqstring
|
||||
|
||||
elsif match = scan(RE::VARIABLE)
|
||||
label_expected = false
|
||||
encoder.text_token match, Words::VARIABLE_KIND[match]
|
||||
|
||||
elsif match = scan(/\{/)
|
||||
encoder.text_token match, :operator
|
||||
label_expected = true
|
||||
states.push :php
|
||||
|
||||
elsif match = scan(/\}/)
|
||||
if states.size == 1
|
||||
encoder.text_token match, :error
|
||||
else
|
||||
states.pop
|
||||
if states.last.is_a?(::Array)
|
||||
delimiter = states.last[1]
|
||||
states[-1] = states.last[0]
|
||||
encoder.text_token match, :delimiter
|
||||
encoder.end_group :inline
|
||||
else
|
||||
encoder.text_token match, :operator
|
||||
label_expected = true
|
||||
end
|
||||
end
|
||||
|
||||
elsif match = scan(/@/)
|
||||
label_expected = false
|
||||
encoder.text_token match, :exception
|
||||
|
||||
elsif match = scan(RE::PHP_END)
|
||||
encoder.text_token match, :inline_delimiter
|
||||
states = [:initial]
|
||||
|
||||
elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
|
||||
encoder.begin_group :string
|
||||
# warn 'heredoc in heredoc?' if heredoc_delimiter
|
||||
heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3])
|
||||
encoder.text_token match, :delimiter
|
||||
states.push self[3] ? :sqstring : :dqstring
|
||||
heredoc_delimiter = /#{heredoc_delimiter}(?=;?$)/
|
||||
|
||||
elsif match = scan(/#{RE::OPERATOR}/o)
|
||||
label_expected = match == ';'
|
||||
if case_expected
|
||||
label_expected = true if match == ':'
|
||||
case_expected = false
|
||||
end
|
||||
encoder.text_token match, :operator
|
||||
|
||||
else
|
||||
encoder.text_token getch, :error
|
||||
|
||||
end
|
||||
|
||||
when :sqstring
|
||||
if match = scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/)
|
||||
encoder.text_token match, :content
|
||||
elsif !heredoc_delimiter && match = scan(/'/)
|
||||
encoder.text_token match, :delimiter
|
||||
encoder.end_group :string
|
||||
delimiter = nil
|
||||
label_expected = false
|
||||
states.pop
|
||||
elsif heredoc_delimiter && match = scan(/\n/)
|
||||
if scan heredoc_delimiter
|
||||
encoder.text_token "\n", :content
|
||||
encoder.text_token matched, :delimiter
|
||||
encoder.end_group :string
|
||||
heredoc_delimiter = nil
|
||||
label_expected = false
|
||||
states.pop
|
||||
else
|
||||
encoder.text_token match, :content
|
||||
end
|
||||
elsif match = scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/)
|
||||
encoder.text_token match, :char
|
||||
elsif match = scan(/\\./m)
|
||||
encoder.text_token match, :content
|
||||
elsif match = scan(/\\/)
|
||||
encoder.text_token match, :error
|
||||
else
|
||||
states.pop
|
||||
end
|
||||
|
||||
when :dqstring
|
||||
if match = scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/))
|
||||
encoder.text_token match, :content
|
||||
elsif !heredoc_delimiter && match = scan(delimiter == '"' ? /"/ : /`/)
|
||||
encoder.text_token match, :delimiter
|
||||
encoder.end_group :string
|
||||
delimiter = nil
|
||||
label_expected = false
|
||||
states.pop
|
||||
elsif heredoc_delimiter && match = scan(/\n/)
|
||||
if scan heredoc_delimiter
|
||||
encoder.text_token "\n", :content
|
||||
encoder.text_token matched, :delimiter
|
||||
encoder.end_group :string
|
||||
heredoc_delimiter = nil
|
||||
label_expected = false
|
||||
states.pop
|
||||
else
|
||||
encoder.text_token match, :content
|
||||
end
|
||||
elsif match = scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
|
||||
encoder.text_token match, :char
|
||||
elsif match = scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/))
|
||||
encoder.text_token match, :char
|
||||
elsif match = scan(/\\./m)
|
||||
encoder.text_token match, :content
|
||||
elsif match = scan(/\\/)
|
||||
encoder.text_token match, :error
|
||||
elsif match = scan(/#{RE::VARIABLE}/o)
|
||||
if check(/\[#{RE::IDENTIFIER}\]/o)
|
||||
encoder.begin_group :inline
|
||||
encoder.text_token match, :local_variable
|
||||
encoder.text_token scan(/\[/), :operator
|
||||
encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident
|
||||
encoder.text_token scan(/\]/), :operator
|
||||
encoder.end_group :inline
|
||||
elsif check(/\[/)
|
||||
match << scan(/\[['"]?#{RE::IDENTIFIER}?['"]?\]?/o)
|
||||
encoder.text_token match, :error
|
||||
elsif check(/->#{RE::IDENTIFIER}/o)
|
||||
encoder.begin_group :inline
|
||||
encoder.text_token match, :local_variable
|
||||
encoder.text_token scan(/->/), :operator
|
||||
encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident
|
||||
encoder.end_group :inline
|
||||
elsif check(/->/)
|
||||
match << scan(/->/)
|
||||
encoder.text_token match, :error
|
||||
else
|
||||
encoder.text_token match, :local_variable
|
||||
end
|
||||
elsif match = scan(/\{/)
|
||||
if check(/\$/)
|
||||
encoder.begin_group :inline
|
||||
states[-1] = [states.last, delimiter]
|
||||
delimiter = nil
|
||||
states.push :php
|
||||
encoder.text_token match, :delimiter
|
||||
else
|
||||
encoder.text_token match, :content
|
||||
end
|
||||
elsif match = scan(/\$\{#{RE::IDENTIFIER}\}/o)
|
||||
encoder.text_token match, :local_variable
|
||||
elsif match = scan(/\$/)
|
||||
encoder.text_token match, :content
|
||||
else
|
||||
states.pop
|
||||
end
|
||||
|
||||
when :class_expected
|
||||
if match = scan(/\s+/)
|
||||
encoder.text_token match, :space
|
||||
elsif match = scan(/#{RE::IDENTIFIER}/o)
|
||||
encoder.text_token match, :class
|
||||
states.pop
|
||||
else
|
||||
states.pop
|
||||
end
|
||||
|
||||
when :function_expected
|
||||
if match = scan(/\s+/)
|
||||
encoder.text_token match, :space
|
||||
elsif match = scan(/&/)
|
||||
encoder.text_token match, :operator
|
||||
elsif match = scan(/#{RE::IDENTIFIER}/o)
|
||||
encoder.text_token match, :function
|
||||
states.pop
|
||||
else
|
||||
states.pop
|
||||
end
|
||||
|
||||
else
|
||||
raise_inspect 'Unknown state!', encoder, states
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
encoder
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,287 @@
|
|||
module CodeRay
|
||||
module Scanners
|
||||
|
||||
# Scanner for Python. Supports Python 3.
|
||||
#
|
||||
# Based on pygments' PythonLexer, see
|
||||
# http://dev.pocoo.org/projects/pygments/browser/pygments/lexers/agile.py.
|
||||
class Python < Scanner
|
||||
|
||||
register_for :python
|
||||
file_extension 'py'
|
||||
|
||||
KEYWORDS = [
|
||||
'and', 'as', 'assert', 'break', 'class', 'continue', 'def',
|
||||
'del', 'elif', 'else', 'except', 'finally', 'for',
|
||||
'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'not',
|
||||
'or', 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield',
|
||||
'nonlocal', # new in Python 3
|
||||
] # :nodoc:
|
||||
|
||||
OLD_KEYWORDS = [
|
||||
'exec', 'print', # gone in Python 3
|
||||
] # :nodoc:
|
||||
|
||||
PREDEFINED_METHODS_AND_TYPES = %w[
|
||||
__import__ abs all any apply basestring bin bool buffer
|
||||
bytearray bytes callable chr classmethod cmp coerce compile
|
||||
complex delattr dict dir divmod enumerate eval execfile exit
|
||||
file filter float frozenset getattr globals hasattr hash hex id
|
||||
input int intern isinstance issubclass iter len list locals
|
||||
long map max min next object oct open ord pow property range
|
||||
raw_input reduce reload repr reversed round set setattr slice
|
||||
sorted staticmethod str sum super tuple type unichr unicode
|
||||
vars xrange zip
|
||||
] # :nodoc:
|
||||
|
||||
PREDEFINED_EXCEPTIONS = %w[
|
||||
ArithmeticError AssertionError AttributeError
|
||||
BaseException DeprecationWarning EOFError EnvironmentError
|
||||
Exception FloatingPointError FutureWarning GeneratorExit IOError
|
||||
ImportError ImportWarning IndentationError IndexError KeyError
|
||||
KeyboardInterrupt LookupError MemoryError NameError
|
||||
NotImplemented NotImplementedError OSError OverflowError
|
||||
OverflowWarning PendingDeprecationWarning ReferenceError
|
||||
RuntimeError RuntimeWarning StandardError StopIteration
|
||||
SyntaxError SyntaxWarning SystemError SystemExit TabError
|
||||
TypeError UnboundLocalError UnicodeDecodeError
|
||||
UnicodeEncodeError UnicodeError UnicodeTranslateError
|
||||
UnicodeWarning UserWarning ValueError Warning ZeroDivisionError
|
||||
] # :nodoc:
|
||||
|
||||
PREDEFINED_VARIABLES_AND_CONSTANTS = [
|
||||
'False', 'True', 'None', # "keywords" since Python 3
|
||||
'self', 'Ellipsis', 'NotImplemented',
|
||||
] # :nodoc:
|
||||
|
||||
IDENT_KIND = WordList.new(:ident).
|
||||
add(KEYWORDS, :keyword).
|
||||
add(OLD_KEYWORDS, :old_keyword).
|
||||
add(PREDEFINED_METHODS_AND_TYPES, :predefined).
|
||||
add(PREDEFINED_VARIABLES_AND_CONSTANTS, :predefined_constant).
|
||||
add(PREDEFINED_EXCEPTIONS, :exception) # :nodoc:
|
||||
|
||||
NAME = / [^\W\d] \w* /x # :nodoc:
|
||||
ESCAPE = / [abfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
|
||||
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} | N\{[-\w ]+\} /x # :nodoc:
|
||||
|
||||
OPERATOR = /
|
||||
\.\.\. | # ellipsis
|
||||
\.(?!\d) | # dot but not decimal point
|
||||
[,;:()\[\]{}] | # simple delimiters
|
||||
\/\/=? | \*\*=? | # special math
|
||||
[-+*\/%&|^]=? | # ordinary math and binary logic
|
||||
[~`] | # binary complement and inspection
|
||||
<<=? | >>=? | [<>=]=? | != # comparison and assignment
|
||||
/x # :nodoc:
|
||||
|
||||
STRING_DELIMITER_REGEXP = Hash.new { |h, delimiter|
|
||||
h[delimiter] = Regexp.union delimiter # :nodoc:
|
||||
}
|
||||
|
||||
STRING_CONTENT_REGEXP = Hash.new { |h, delimiter|
|
||||
h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x # :nodoc:
|
||||
}
|
||||
|
||||
DEF_NEW_STATE = WordList.new(:initial).
|
||||
add(%w(def), :def_expected).
|
||||
add(%w(import from), :include_expected).
|
||||
add(%w(class), :class_expected) # :nodoc:
|
||||
|
||||
DESCRIPTOR = /
|
||||
#{NAME}
|
||||
(?: \. #{NAME} )*
|
||||
| \*
|
||||
/x # :nodoc:
|
||||
|
||||
DOCSTRING_COMING = /
|
||||
[ \t]* u?r? ("""|''')
|
||||
/x # :nodoc:
|
||||
|
||||
protected
|
||||
|
||||
def scan_tokens encoder, options
|
||||
|
||||
state = :initial
|
||||
string_delimiter = nil
|
||||
string_raw = false
|
||||
string_type = nil
|
||||
docstring_coming = match?(/#{DOCSTRING_COMING}/o)
|
||||
last_token_dot = false
|
||||
unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
|
||||
from_import_state = []
|
||||
|
||||
until eos?
|
||||
|
||||
if state == :string
|
||||
if match = scan(STRING_DELIMITER_REGEXP[string_delimiter])
|
||||
encoder.text_token match, :delimiter
|
||||
encoder.end_group string_type
|
||||
string_type = nil
|
||||
state = :initial
|
||||
next
|
||||
elsif string_delimiter.size == 3 && match = scan(/\n/)
|
||||
encoder.text_token match, :content
|
||||
elsif match = scan(STRING_CONTENT_REGEXP[string_delimiter])
|
||||
encoder.text_token match, :content
|
||||
elsif !string_raw && match = scan(/ \\ #{ESCAPE} /ox)
|
||||
encoder.text_token match, :char
|
||||
elsif match = scan(/ \\ #{UNICODE_ESCAPE} /ox)
|
||||
encoder.text_token match, :char
|
||||
elsif match = scan(/ \\ . /x)
|
||||
encoder.text_token match, :content
|
||||
elsif match = scan(/ \\ | $ /x)
|
||||
encoder.end_group string_type
|
||||
string_type = nil
|
||||
encoder.text_token match, :error
|
||||
state = :initial
|
||||
else
|
||||
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder, state
|
||||
end
|
||||
|
||||
elsif match = scan(/ [ \t]+ | \\?\n /x)
|
||||
encoder.text_token match, :space
|
||||
if match == "\n"
|
||||
state = :initial if state == :include_expected
|
||||
docstring_coming = true if match?(/#{DOCSTRING_COMING}/o)
|
||||
end
|
||||
next
|
||||
|
||||
elsif match = scan(/ \# [^\n]* /mx)
|
||||
encoder.text_token match, :comment
|
||||
next
|
||||
|
||||
elsif state == :initial
|
||||
|
||||
if match = scan(/#{OPERATOR}/o)
|
||||
encoder.text_token match, :operator
|
||||
|
||||
elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
|
||||
string_delimiter = self[2]
|
||||
string_type = docstring_coming ? :docstring : :string
|
||||
docstring_coming = false if docstring_coming
|
||||
encoder.begin_group string_type
|
||||
string_raw = false
|
||||
modifiers = self[1]
|
||||
unless modifiers.empty?
|
||||
string_raw = !!modifiers.index(?r)
|
||||
encoder.text_token modifiers, :modifier
|
||||
match = string_delimiter
|
||||
end
|
||||
state = :string
|
||||
encoder.text_token match, :delimiter
|
||||
|
||||
# TODO: backticks
|
||||
|
||||
elsif match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
|
||||
kind = IDENT_KIND[match]
|
||||
# TODO: keyword arguments
|
||||
kind = :ident if last_token_dot
|
||||
if kind == :old_keyword
|
||||
kind = check(/\(/) ? :ident : :keyword
|
||||
elsif kind == :predefined && check(/ *=/)
|
||||
kind = :ident
|
||||
elsif kind == :keyword
|
||||
state = DEF_NEW_STATE[match]
|
||||
from_import_state << match.to_sym if state == :include_expected
|
||||
end
|
||||
encoder.text_token match, kind
|
||||
|
||||
elsif match = scan(/@[a-zA-Z0-9_.]+[lL]?/)
|
||||
encoder.text_token match, :decorator
|
||||
|
||||
elsif match = scan(/0[xX][0-9A-Fa-f]+[lL]?/)
|
||||
encoder.text_token match, :hex
|
||||
|
||||
elsif match = scan(/0[bB][01]+[lL]?/)
|
||||
encoder.text_token match, :binary
|
||||
|
||||
elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
|
||||
if scan(/[jJ]/)
|
||||
match << matched
|
||||
encoder.text_token match, :imaginary
|
||||
else
|
||||
encoder.text_token match, :float
|
||||
end
|
||||
|
||||
elsif match = scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
|
||||
encoder.text_token match, :octal
|
||||
|
||||
elsif match = scan(/\d+([lL])?/)
|
||||
if self[1] == nil && scan(/[jJ]/)
|
||||
match << matched
|
||||
encoder.text_token match, :imaginary
|
||||
else
|
||||
encoder.text_token match, :integer
|
||||
end
|
||||
|
||||
else
|
||||
encoder.text_token getch, :error
|
||||
|
||||
end
|
||||
|
||||
elsif state == :def_expected
|
||||
state = :initial
|
||||
if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
|
||||
encoder.text_token match, :method
|
||||
else
|
||||
next
|
||||
end
|
||||
|
||||
elsif state == :class_expected
|
||||
state = :initial
|
||||
if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
|
||||
encoder.text_token match, :class
|
||||
else
|
||||
next
|
||||
end
|
||||
|
||||
elsif state == :include_expected
|
||||
if match = scan(unicode ? /#{DESCRIPTOR}/uo : /#{DESCRIPTOR}/o)
|
||||
if match == 'as'
|
||||
encoder.text_token match, :keyword
|
||||
from_import_state << :as
|
||||
elsif from_import_state.first == :from && match == 'import'
|
||||
encoder.text_token match, :keyword
|
||||
from_import_state << :import
|
||||
elsif from_import_state.last == :as
|
||||
# encoder.text_token match, match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method
|
||||
encoder.text_token match, :ident
|
||||
from_import_state.pop
|
||||
elsif IDENT_KIND[match] == :keyword
|
||||
unscan
|
||||
match = nil
|
||||
state = :initial
|
||||
next
|
||||
else
|
||||
encoder.text_token match, :include
|
||||
end
|
||||
elsif match = scan(/,/)
|
||||
from_import_state.pop if from_import_state.last == :as
|
||||
encoder.text_token match, :operator
|
||||
else
|
||||
from_import_state = []
|
||||
state = :initial
|
||||
next
|
||||
end
|
||||
|
||||
else
|
||||
raise_inspect 'Unknown state', encoder, state
|
||||
|
||||
end
|
||||
|
||||
last_token_dot = match == '.'
|
||||
|
||||
end
|
||||
|
||||
if state == :string
|
||||
encoder.end_group string_type
|
||||
end
|
||||
|
||||
encoder
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,66 @@
|
|||
module CodeRay
|
||||
module Scanners
|
||||
|
||||
# = Debug Scanner
|
||||
#
|
||||
# Parses the output of the Encoders::Debug encoder.
|
||||
class Raydebug < Scanner
|
||||
|
||||
register_for :raydebug
|
||||
file_extension 'raydebug'
|
||||
title 'CodeRay Token Dump'
|
||||
|
||||
protected
|
||||
|
||||
def scan_tokens encoder, options
|
||||
|
||||
opened_tokens = []
|
||||
|
||||
until eos?
|
||||
|
||||
if match = scan(/\s+/)
|
||||
encoder.text_token match, :space
|
||||
|
||||
elsif match = scan(/ (\w+) \( ( [^\)\\]* ( \\. [^\)\\]* )* ) /x)
|
||||
kind = self[1]
|
||||
encoder.text_token kind, :class
|
||||
encoder.text_token '(', :operator
|
||||
match = self[2]
|
||||
encoder.text_token match, kind.to_sym
|
||||
encoder.text_token match, :operator if match = scan(/\)/)
|
||||
|
||||
elsif match = scan(/ (\w+) ([<\[]) /x)
|
||||
kind = self[1]
|
||||
case self[2]
|
||||
when '<'
|
||||
encoder.text_token kind, :class
|
||||
when '['
|
||||
encoder.text_token kind, :class
|
||||
else
|
||||
raise 'CodeRay bug: This case should not be reached.'
|
||||
end
|
||||
kind = kind.to_sym
|
||||
opened_tokens << kind
|
||||
encoder.begin_group kind
|
||||
encoder.text_token self[2], :operator
|
||||
|
||||
elsif !opened_tokens.empty? && match = scan(/ [>\]] /x)
|
||||
encoder.text_token match, :operator
|
||||
encoder.end_group opened_tokens.pop
|
||||
|
||||
else
|
||||
encoder.text_token getch, :space
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
encoder.end_group opened_tokens.pop until opened_tokens.empty?
|
||||
|
||||
encoder
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,461 @@
|
|||
module CodeRay
|
||||
module Scanners
|
||||
|
||||
# This scanner is really complex, since Ruby _is_ a complex language!
|
||||
#
|
||||
# It tries to highlight 100% of all common code,
|
||||
# and 90% of strange codes.
|
||||
#
|
||||
# It is optimized for HTML highlighting, and is not very useful for
|
||||
# parsing or pretty printing.
|
||||
class Ruby < Scanner
|
||||
|
||||
register_for :ruby
|
||||
file_extension 'rb'
|
||||
|
||||
autoload :Patterns, 'coderay/scanners/ruby/patterns'
|
||||
autoload :StringState, 'coderay/scanners/ruby/string_state'
|
||||
|
||||
def interpreted_string_state
|
||||
StringState.new :string, true, '"'
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
def setup
|
||||
@state = :initial
|
||||
end
|
||||
|
||||
def scan_tokens encoder, options
|
||||
state, heredocs = options[:state] || @state
|
||||
heredocs = heredocs.dup if heredocs.is_a?(Array)
|
||||
|
||||
if state && state.instance_of?(StringState)
|
||||
encoder.begin_group state.type
|
||||
end
|
||||
|
||||
last_state = nil
|
||||
|
||||
method_call_expected = false
|
||||
value_expected = true
|
||||
|
||||
inline_block_stack = nil
|
||||
inline_block_curly_depth = 0
|
||||
|
||||
if heredocs
|
||||
state = heredocs.shift
|
||||
encoder.begin_group state.type
|
||||
heredocs = nil if heredocs.empty?
|
||||
end
|
||||
|
||||
# def_object_stack = nil
|
||||
# def_object_paren_depth = 0
|
||||
|
||||
patterns = Patterns # avoid constant lookup
|
||||
|
||||
unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
|
||||
|
||||
until eos?
|
||||
|
||||
if state.instance_of? ::Symbol
|
||||
|
||||
if match = scan(/[ \t\f\v]+/)
|
||||
encoder.text_token match, :space
|
||||
|
||||
elsif match = scan(/\n/)
|
||||
if heredocs
|
||||
unscan # heredoc scanning needs \n at start
|
||||
state = heredocs.shift
|
||||
encoder.begin_group state.type
|
||||
heredocs = nil if heredocs.empty?
|
||||
else
|
||||
state = :initial if state == :undef_comma_expected
|
||||
encoder.text_token match, :space
|
||||
value_expected = true
|
||||
end
|
||||
|
||||
elsif match = scan(bol? ? / \#(!)?.* | #{patterns::RUBYDOC_OR_DATA} /ox : /\#.*/)
|
||||
encoder.text_token match, self[1] ? :doctype : :comment
|
||||
|
||||
elsif match = scan(/\\\n/)
|
||||
if heredocs
|
||||
unscan # heredoc scanning needs \n at start
|
||||
encoder.text_token scan(/\\/), :space
|
||||
state = heredocs.shift
|
||||
encoder.begin_group state.type
|
||||
heredocs = nil if heredocs.empty?
|
||||
else
|
||||
encoder.text_token match, :space
|
||||
end
|
||||
|
||||
elsif state == :initial
|
||||
|
||||
# IDENTS #
|
||||
if !method_call_expected &&
|
||||
match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
|
||||
/#{patterns::METHOD_NAME}/o)
|
||||
value_expected = false
|
||||
kind = patterns::IDENT_KIND[match]
|
||||
if kind == :ident
|
||||
if match[/\A[A-Z]/] && !(match[/[!?]$/] || match?(/\(/))
|
||||
kind = :constant
|
||||
end
|
||||
elsif kind == :keyword
|
||||
state = patterns::KEYWORD_NEW_STATE[match]
|
||||
value_expected = true if patterns::KEYWORDS_EXPECTING_VALUE[match]
|
||||
end
|
||||
value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o)
|
||||
encoder.text_token match, kind
|
||||
|
||||
elsif method_call_expected &&
|
||||
match = scan(unicode ? /#{patterns::METHOD_AFTER_DOT}/uo :
|
||||
/#{patterns::METHOD_AFTER_DOT}/o)
|
||||
if method_call_expected == '::' && match[/\A[A-Z]/] && !match?(/\(/)
|
||||
encoder.text_token match, :constant
|
||||
else
|
||||
encoder.text_token match, :ident
|
||||
end
|
||||
method_call_expected = false
|
||||
value_expected = check(/#{patterns::VALUE_FOLLOWS}/o)
|
||||
|
||||
# OPERATORS #
|
||||
elsif !method_call_expected && match = scan(/ (\.(?!\.)|::) | (?: \.\.\.? | ==?=? | [,\(\[\{] )() | [\)\]\}] /x)
|
||||
method_call_expected = self[1]
|
||||
value_expected = !method_call_expected && self[2]
|
||||
if inline_block_stack
|
||||
case match
|
||||
when '{'
|
||||
inline_block_curly_depth += 1
|
||||
when '}'
|
||||
inline_block_curly_depth -= 1
|
||||
if inline_block_curly_depth == 0 # closing brace of inline block reached
|
||||
state, inline_block_curly_depth, heredocs = inline_block_stack.pop
|
||||
inline_block_stack = nil if inline_block_stack.empty?
|
||||
heredocs = nil if heredocs && heredocs.empty?
|
||||
encoder.text_token match, :inline_delimiter
|
||||
encoder.end_group :inline
|
||||
next
|
||||
end
|
||||
end
|
||||
end
|
||||
encoder.text_token match, :operator
|
||||
|
||||
elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
|
||||
/#{patterns::SYMBOL}/o)
|
||||
case delim = match[1]
|
||||
when ?', ?"
|
||||
encoder.begin_group :symbol
|
||||
encoder.text_token ':', :symbol
|
||||
match = delim.chr
|
||||
encoder.text_token match, :delimiter
|
||||
state = self.class::StringState.new :symbol, delim == ?", match
|
||||
else
|
||||
encoder.text_token match, :symbol
|
||||
value_expected = false
|
||||
end
|
||||
|
||||
elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx)
|
||||
encoder.begin_group :string
|
||||
if match.size == 1
|
||||
encoder.text_token match, :delimiter
|
||||
state = self.class::StringState.new :string, match == '"', match # important for streaming
|
||||
else
|
||||
encoder.text_token match[0,1], :delimiter
|
||||
encoder.text_token match[1..-2], :content if match.size > 2
|
||||
encoder.text_token match[-1,1], :delimiter
|
||||
encoder.end_group :string
|
||||
value_expected = false
|
||||
end
|
||||
|
||||
elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
|
||||
/#{patterns::INSTANCE_VARIABLE}/o)
|
||||
value_expected = false
|
||||
encoder.text_token match, :instance_variable
|
||||
|
||||
elsif value_expected && match = scan(/\//)
|
||||
encoder.begin_group :regexp
|
||||
encoder.text_token match, :delimiter
|
||||
state = self.class::StringState.new :regexp, true, '/'
|
||||
|
||||
elsif match = scan(value_expected ? /[-+]?#{patterns::NUMERIC}/o : /#{patterns::NUMERIC}/o)
|
||||
if method_call_expected
|
||||
encoder.text_token match, :error
|
||||
method_call_expected = false
|
||||
else
|
||||
encoder.text_token match, self[1] ? :float : :integer # TODO: send :hex/:octal/:binary
|
||||
end
|
||||
value_expected = false
|
||||
|
||||
elsif match = scan(/ [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /x)
|
||||
value_expected = true
|
||||
encoder.text_token match, :operator
|
||||
|
||||
elsif value_expected && match = scan(/#{patterns::HEREDOC_OPEN}/o)
|
||||
quote = self[3]
|
||||
delim = self[quote ? 4 : 2]
|
||||
kind = patterns::QUOTE_TO_TYPE[quote]
|
||||
encoder.begin_group kind
|
||||
encoder.text_token match, :delimiter
|
||||
encoder.end_group kind
|
||||
heredocs ||= [] # create heredocs if empty
|
||||
heredocs << self.class::StringState.new(kind, quote != "'", delim,
|
||||
self[1] == '-' ? :indented : :linestart)
|
||||
value_expected = false
|
||||
|
||||
elsif value_expected && match = scan(/#{patterns::FANCY_STRING_START}/o)
|
||||
kind = patterns::FANCY_STRING_KIND[self[1]]
|
||||
encoder.begin_group kind
|
||||
state = self.class::StringState.new kind, patterns::FANCY_STRING_INTERPRETED[self[1]], self[2]
|
||||
encoder.text_token match, :delimiter
|
||||
|
||||
elsif value_expected && match = scan(/#{patterns::CHARACTER}/o)
|
||||
value_expected = false
|
||||
encoder.text_token match, :integer
|
||||
|
||||
elsif match = scan(/ %=? | <(?:<|=>?)? | \? /x)
|
||||
value_expected = true
|
||||
encoder.text_token match, :operator
|
||||
|
||||
elsif match = scan(/`/)
|
||||
encoder.begin_group :shell
|
||||
encoder.text_token match, :delimiter
|
||||
state = self.class::StringState.new :shell, true, match
|
||||
|
||||
elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo :
|
||||
/#{patterns::GLOBAL_VARIABLE}/o)
|
||||
encoder.text_token match, :global_variable
|
||||
value_expected = false
|
||||
|
||||
elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo :
|
||||
/#{patterns::CLASS_VARIABLE}/o)
|
||||
encoder.text_token match, :class_variable
|
||||
value_expected = false
|
||||
|
||||
elsif match = scan(/\\\z/)
|
||||
encoder.text_token match, :space
|
||||
|
||||
else
|
||||
if method_call_expected
|
||||
method_call_expected = false
|
||||
next
|
||||
end
|
||||
unless unicode
|
||||
# check for unicode
|
||||
$DEBUG_BEFORE, $DEBUG = $DEBUG, false
|
||||
begin
|
||||
if check(/./mu).size > 1
|
||||
# seems like we should try again with unicode
|
||||
unicode = true
|
||||
end
|
||||
rescue
|
||||
# bad unicode char; use getch
|
||||
ensure
|
||||
$DEBUG = $DEBUG_BEFORE
|
||||
end
|
||||
next if unicode
|
||||
end
|
||||
|
||||
encoder.text_token getch, :error
|
||||
|
||||
end
|
||||
|
||||
if last_state
|
||||
state = last_state
|
||||
last_state = nil
|
||||
end
|
||||
|
||||
elsif state == :def_expected
|
||||
if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
|
||||
/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
|
||||
encoder.text_token match, :method
|
||||
state = :initial
|
||||
else
|
||||
last_state = :dot_expected
|
||||
state = :initial
|
||||
end
|
||||
|
||||
elsif state == :dot_expected
|
||||
if match = scan(/\.|::/)
|
||||
# invalid definition
|
||||
state = :def_expected
|
||||
encoder.text_token match, :operator
|
||||
else
|
||||
state = :initial
|
||||
end
|
||||
|
||||
elsif state == :module_expected
|
||||
if match = scan(/<</)
|
||||
encoder.text_token match, :operator
|
||||
else
|
||||
state = :initial
|
||||
if match = scan(unicode ? / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /oux :
|
||||
/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
|
||||
encoder.text_token match, :class
|
||||
end
|
||||
end
|
||||
|
||||
elsif state == :undef_expected
|
||||
state = :undef_comma_expected
|
||||
if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
|
||||
/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
|
||||
encoder.text_token match, :method
|
||||
elsif match = scan(/#{patterns::SYMBOL}/o)
|
||||
case delim = match[1]
|
||||
when ?', ?"
|
||||
encoder.begin_group :symbol
|
||||
encoder.text_token ':', :symbol
|
||||
match = delim.chr
|
||||
encoder.text_token match, :delimiter
|
||||
state = self.class::StringState.new :symbol, delim == ?", match
|
||||
state.next_state = :undef_comma_expected
|
||||
else
|
||||
encoder.text_token match, :symbol
|
||||
end
|
||||
else
|
||||
state = :initial
|
||||
end
|
||||
|
||||
elsif state == :undef_comma_expected
|
||||
if match = scan(/,/)
|
||||
encoder.text_token match, :operator
|
||||
state = :undef_expected
|
||||
else
|
||||
state = :initial
|
||||
end
|
||||
|
||||
elsif state == :alias_expected
|
||||
match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
|
||||
/(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
|
||||
|
||||
if match
|
||||
encoder.text_token self[1], (self[1][0] == ?: ? :symbol : :method)
|
||||
encoder.text_token self[2], :space
|
||||
encoder.text_token self[3], (self[3][0] == ?: ? :symbol : :method)
|
||||
end
|
||||
state = :initial
|
||||
|
||||
else
|
||||
#:nocov:
|
||||
raise_inspect 'Unknown state: %p' % [state], encoder
|
||||
#:nocov:
|
||||
end
|
||||
|
||||
else # StringState
|
||||
|
||||
match = scan_until(state.pattern) || scan_rest
|
||||
unless match.empty?
|
||||
encoder.text_token match, :content
|
||||
break if eos?
|
||||
end
|
||||
|
||||
if state.heredoc && self[1] # end of heredoc
|
||||
match = getch
|
||||
match << scan_until(/$/) unless eos?
|
||||
encoder.text_token match, :delimiter unless match.empty?
|
||||
encoder.end_group state.type
|
||||
state = state.next_state
|
||||
next
|
||||
end
|
||||
|
||||
case match = getch
|
||||
|
||||
when state.delim
|
||||
if state.paren_depth
|
||||
state.paren_depth -= 1
|
||||
if state.paren_depth > 0
|
||||
encoder.text_token match, :content
|
||||
next
|
||||
end
|
||||
end
|
||||
encoder.text_token match, :delimiter
|
||||
if state.type == :regexp && !eos?
|
||||
match = scan(/#{patterns::REGEXP_MODIFIERS}/o)
|
||||
encoder.text_token match, :modifier unless match.empty?
|
||||
end
|
||||
encoder.end_group state.type
|
||||
value_expected = false
|
||||
state = state.next_state
|
||||
|
||||
when '\\'
|
||||
if state.interpreted
|
||||
if esc = scan(/#{patterns::ESCAPE}/o)
|
||||
encoder.text_token match + esc, :char
|
||||
else
|
||||
encoder.text_token match, :error
|
||||
end
|
||||
else
|
||||
case esc = getch
|
||||
when nil
|
||||
encoder.text_token match, :content
|
||||
when state.delim, '\\'
|
||||
encoder.text_token match + esc, :char
|
||||
else
|
||||
encoder.text_token match + esc, :content
|
||||
end
|
||||
end
|
||||
|
||||
when '#'
|
||||
case peek(1)
|
||||
when '{'
|
||||
inline_block_stack ||= []
|
||||
inline_block_stack << [state, inline_block_curly_depth, heredocs]
|
||||
value_expected = true
|
||||
state = :initial
|
||||
inline_block_curly_depth = 1
|
||||
encoder.begin_group :inline
|
||||
encoder.text_token match + getch, :inline_delimiter
|
||||
when '$', '@'
|
||||
encoder.text_token match, :escape
|
||||
last_state = state
|
||||
state = :initial
|
||||
else
|
||||
#:nocov:
|
||||
raise_inspect 'else-case # reached; #%p not handled' % [peek(1)], encoder
|
||||
#:nocov:
|
||||
end
|
||||
|
||||
when state.opening_paren
|
||||
state.paren_depth += 1
|
||||
encoder.text_token match, :content
|
||||
|
||||
else
|
||||
#:nocov
|
||||
raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], encoder
|
||||
#:nocov:
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
# cleaning up
|
||||
if state.is_a? StringState
|
||||
encoder.end_group state.type
|
||||
end
|
||||
|
||||
if options[:keep_state]
|
||||
if state.is_a?(StringState) && state.heredoc
|
||||
(heredocs ||= []).unshift state
|
||||
state = :initial
|
||||
elsif heredocs && heredocs.empty?
|
||||
heredocs = nil
|
||||
end
|
||||
@state = state, heredocs
|
||||
end
|
||||
|
||||
if inline_block_stack
|
||||
until inline_block_stack.empty?
|
||||
state, = *inline_block_stack.pop
|
||||
encoder.end_group :inline
|
||||
encoder.end_group state.type
|
||||
end
|
||||
end
|
||||
|
||||
encoder
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,174 @@
|
|||
module CodeRay module Scanners
|
||||
|
||||
# by Josh Goebel
|
||||
class SQL < Scanner
|
||||
|
||||
register_for :sql
|
||||
|
||||
KEYWORDS = %w(
|
||||
all and any as before begin between by case check collate
|
||||
each else end exists
|
||||
for foreign from full group having if in inner is join
|
||||
like not of on or order outer over references
|
||||
then to union using values when where
|
||||
left right distinct
|
||||
)
|
||||
|
||||
OBJECTS = %w(
|
||||
database databases table tables column columns fields index constraint
|
||||
constraints transaction function procedure row key view trigger
|
||||
)
|
||||
|
||||
COMMANDS = %w(
|
||||
add alter comment create delete drop grant insert into select update set
|
||||
show prompt begin commit rollback replace truncate
|
||||
)
|
||||
|
||||
PREDEFINED_TYPES = %w(
|
||||
char varchar varchar2 enum binary text tinytext mediumtext
|
||||
longtext blob tinyblob mediumblob longblob timestamp
|
||||
date time datetime year double decimal float int
|
||||
integer tinyint mediumint bigint smallint unsigned bit
|
||||
bool boolean hex bin oct
|
||||
)
|
||||
|
||||
PREDEFINED_FUNCTIONS = %w( sum cast substring abs pi count min max avg now )
|
||||
|
||||
DIRECTIVES = %w(
|
||||
auto_increment unique default charset initially deferred
|
||||
deferrable cascade immediate read write asc desc after
|
||||
primary foreign return engine
|
||||
)
|
||||
|
||||
PREDEFINED_CONSTANTS = %w( null true false )
|
||||
|
||||
IDENT_KIND = WordList::CaseIgnoring.new(:ident).
|
||||
add(KEYWORDS, :keyword).
|
||||
add(OBJECTS, :type).
|
||||
add(COMMANDS, :class).
|
||||
add(PREDEFINED_TYPES, :predefined_type).
|
||||
add(PREDEFINED_CONSTANTS, :predefined_constant).
|
||||
add(PREDEFINED_FUNCTIONS, :predefined).
|
||||
add(DIRECTIVES, :directive)
|
||||
|
||||
ESCAPE = / [rbfntv\n\\\/'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | . /mx
|
||||
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
|
||||
|
||||
STRING_PREFIXES = /[xnb]|_\w+/i
|
||||
|
||||
def scan_tokens encoder, options
|
||||
|
||||
state = :initial
|
||||
string_type = nil
|
||||
string_content = ''
|
||||
name_expected = false
|
||||
|
||||
until eos?
|
||||
|
||||
if state == :initial
|
||||
|
||||
if match = scan(/ \s+ | \\\n /x)
|
||||
encoder.text_token match, :space
|
||||
|
||||
elsif match = scan(/(?:--\s?|#).*/)
|
||||
encoder.text_token match, :comment
|
||||
|
||||
elsif match = scan(%r( /\* (!)? (?: .*? \*/ | .* ) )mx)
|
||||
encoder.text_token match, self[1] ? :directive : :comment
|
||||
|
||||
elsif match = scan(/ [*\/=<>:;,!&^|()\[\]{}~%] | [-+\.](?!\d) /x)
|
||||
name_expected = true if match == '.' && check(/[A-Za-z_]/)
|
||||
encoder.text_token match, :operator
|
||||
|
||||
elsif match = scan(/(#{STRING_PREFIXES})?([`"'])/o)
|
||||
prefix = self[1]
|
||||
string_type = self[2]
|
||||
encoder.begin_group :string
|
||||
encoder.text_token prefix, :modifier if prefix
|
||||
match = string_type
|
||||
state = :string
|
||||
encoder.text_token match, :delimiter
|
||||
|
||||
elsif match = scan(/ @? [A-Za-z_][A-Za-z_0-9]* /x)
|
||||
encoder.text_token match, name_expected ? :ident : (match[0] == ?@ ? :variable : IDENT_KIND[match])
|
||||
name_expected = false
|
||||
|
||||
elsif match = scan(/0[xX][0-9A-Fa-f]+/)
|
||||
encoder.text_token match, :hex
|
||||
|
||||
elsif match = scan(/0[0-7]+(?![89.eEfF])/)
|
||||
encoder.text_token match, :octal
|
||||
|
||||
elsif match = scan(/[-+]?(?>\d+)(?![.eEfF])/)
|
||||
encoder.text_token match, :integer
|
||||
|
||||
elsif match = scan(/[-+]?(?:\d[fF]|\d*\.\d+(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+)/)
|
||||
encoder.text_token match, :float
|
||||
|
||||
elsif match = scan(/\\N/)
|
||||
encoder.text_token match, :predefined_constant
|
||||
|
||||
else
|
||||
encoder.text_token getch, :error
|
||||
|
||||
end
|
||||
|
||||
elsif state == :string
|
||||
if match = scan(/[^\\"'`]+/)
|
||||
string_content << match
|
||||
next
|
||||
elsif match = scan(/["'`]/)
|
||||
if string_type == match
|
||||
if peek(1) == string_type # doubling means escape
|
||||
string_content << string_type << getch
|
||||
next
|
||||
end
|
||||
unless string_content.empty?
|
||||
encoder.text_token string_content, :content
|
||||
string_content = ''
|
||||
end
|
||||
encoder.text_token match, :delimiter
|
||||
encoder.end_group :string
|
||||
state = :initial
|
||||
string_type = nil
|
||||
else
|
||||
string_content << match
|
||||
end
|
||||
elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
|
||||
unless string_content.empty?
|
||||
encoder.text_token string_content, :content
|
||||
string_content = ''
|
||||
end
|
||||
encoder.text_token match, :char
|
||||
elsif match = scan(/ \\ . /mox)
|
||||
string_content << match
|
||||
next
|
||||
elsif match = scan(/ \\ | $ /x)
|
||||
unless string_content.empty?
|
||||
encoder.text_token string_content, :content
|
||||
string_content = ''
|
||||
end
|
||||
encoder.text_token match, :error
|
||||
state = :initial
|
||||
else
|
||||
raise "else case \" reached; %p not handled." % peek(1), encoder
|
||||
end
|
||||
|
||||
else
|
||||
raise 'else-case reached', encoder
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
if state == :string
|
||||
encoder.end_group state
|
||||
end
|
||||
|
||||
encoder
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end end
|
|
@ -0,0 +1,26 @@
|
|||
module CodeRay
|
||||
module Scanners
|
||||
|
||||
# Scanner for plain text.
|
||||
#
|
||||
# Yields just one token of the kind :plain.
|
||||
#
|
||||
# Alias: +plaintext+, +plain+
|
||||
class Text < Scanner
|
||||
|
||||
register_for :text
|
||||
title 'Plain text'
|
||||
|
||||
KINDS_NOT_LOC = [:plain] # :nodoc:
|
||||
|
||||
protected
|
||||
|
||||
def scan_tokens encoder, options
|
||||
encoder.text_token string, :plain
|
||||
encoder
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,17 @@
|
|||
module CodeRay
|
||||
module Scanners
|
||||
|
||||
load :html
|
||||
|
||||
# Scanner for XML.
|
||||
#
|
||||
# Currently this is the same scanner as Scanners::HTML.
|
||||
class XML < HTML
|
||||
|
||||
register_for :xml
|
||||
file_extension 'xml'
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,140 @@
|
|||
module CodeRay
|
||||
module Scanners
|
||||
|
||||
# Scanner for YAML.
|
||||
#
|
||||
# Based on the YAML scanner from Syntax by Jamis Buck.
|
||||
class YAML < Scanner
|
||||
|
||||
register_for :yaml
|
||||
file_extension 'yml'
|
||||
|
||||
KINDS_NOT_LOC = :all
|
||||
|
||||
protected
|
||||
|
||||
def scan_tokens encoder, options
|
||||
|
||||
state = :initial
|
||||
key_indent = string_indent = 0
|
||||
|
||||
until eos?
|
||||
|
||||
key_indent = nil if bol?
|
||||
|
||||
if match = scan(/ +[\t ]*/)
|
||||
encoder.text_token match, :space
|
||||
|
||||
elsif match = scan(/\n+/)
|
||||
encoder.text_token match, :space
|
||||
state = :initial if match.index(?\n)
|
||||
|
||||
elsif match = scan(/#.*/)
|
||||
encoder.text_token match, :comment
|
||||
|
||||
elsif bol? and case
|
||||
when match = scan(/---|\.\.\./)
|
||||
encoder.begin_group :head
|
||||
encoder.text_token match, :head
|
||||
encoder.end_group :head
|
||||
next
|
||||
when match = scan(/%.*/)
|
||||
encoder.text_token match, :doctype
|
||||
next
|
||||
end
|
||||
|
||||
elsif state == :value and case
|
||||
when !check(/(?:"[^"]*")(?=: |:$)/) && match = scan(/"/)
|
||||
encoder.begin_group :string
|
||||
encoder.text_token match, :delimiter
|
||||
encoder.text_token match, :content if match = scan(/ [^"\\]* (?: \\. [^"\\]* )* /mx)
|
||||
encoder.text_token match, :delimiter if match = scan(/"/)
|
||||
encoder.end_group :string
|
||||
next
|
||||
when match = scan(/[|>][-+]?/)
|
||||
encoder.begin_group :string
|
||||
encoder.text_token match, :delimiter
|
||||
string_indent = key_indent || column(pos - match.size) - 1
|
||||
encoder.text_token matched, :content if scan(/(?:\n+ {#{string_indent + 1}}.*)+/)
|
||||
encoder.end_group :string
|
||||
next
|
||||
when match = scan(/(?![!"*&]).+?(?=$|\s+#)/)
|
||||
encoder.begin_group :string
|
||||
encoder.text_token match, :content
|
||||
string_indent = key_indent || column(pos - match.size) - 1
|
||||
encoder.text_token matched, :content if scan(/(?:\n+ {#{string_indent + 1}}.*)+/)
|
||||
encoder.end_group :string
|
||||
next
|
||||
end
|
||||
|
||||
elsif case
|
||||
when match = scan(/[-:](?= |$)/)
|
||||
state = :value if state == :colon && (match == ':' || match == '-')
|
||||
state = :value if state == :initial && match == '-'
|
||||
encoder.text_token match, :operator
|
||||
next
|
||||
when match = scan(/[,{}\[\]]/)
|
||||
encoder.text_token match, :operator
|
||||
next
|
||||
when state == :initial && match = scan(/[\w.() ]*\S(?= *:(?: |$))/)
|
||||
encoder.text_token match, :key
|
||||
key_indent = column(pos - match.size) - 1
|
||||
state = :colon
|
||||
next
|
||||
when match = scan(/(?:"[^"\n]*"|'[^'\n]*')(?= *:(?: |$))/)
|
||||
encoder.begin_group :key
|
||||
encoder.text_token match[0,1], :delimiter
|
||||
encoder.text_token match[1..-2], :content
|
||||
encoder.text_token match[-1,1], :delimiter
|
||||
encoder.end_group :key
|
||||
key_indent = column(pos - match.size) - 1
|
||||
state = :colon
|
||||
next
|
||||
when match = scan(/(![\w\/]+)(:([\w:]+))?/)
|
||||
encoder.text_token self[1], :type
|
||||
if self[2]
|
||||
encoder.text_token ':', :operator
|
||||
encoder.text_token self[3], :class
|
||||
end
|
||||
next
|
||||
when match = scan(/&\S+/)
|
||||
encoder.text_token match, :variable
|
||||
next
|
||||
when match = scan(/\*\w+/)
|
||||
encoder.text_token match, :global_variable
|
||||
next
|
||||
when match = scan(/<</)
|
||||
encoder.text_token match, :class_variable
|
||||
next
|
||||
when match = scan(/\d\d:\d\d:\d\d/)
|
||||
encoder.text_token match, :octal
|
||||
next
|
||||
when match = scan(/\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d(\.\d+)? [-+]\d\d:\d\d/)
|
||||
encoder.text_token match, :octal
|
||||
next
|
||||
when match = scan(/:\w+/)
|
||||
encoder.text_token match, :symbol
|
||||
next
|
||||
when match = scan(/[^:\s]+(:(?! |$)[^:\s]*)* .*/)
|
||||
encoder.text_token match, :error
|
||||
next
|
||||
when match = scan(/[^:\s]+(:(?! |$)[^:\s]*)*/)
|
||||
encoder.text_token match, :error
|
||||
next
|
||||
end
|
||||
|
||||
else
|
||||
raise if eos?
|
||||
encoder.text_token getch, :error
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
encoder
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue