216 lines
6.4 KiB
Ruby
216 lines
6.4 KiB
Ruby
|
module CodeRay
|
||
|
|
||
|
# GZip library for writing and reading token dumps.
|
||
|
autoload :GZip, 'coderay/helpers/gzip'
|
||
|
|
||
|
# = Tokens TODO: Rewrite!
|
||
|
#
|
||
|
# The Tokens class represents a list of tokens returnd from
|
||
|
# a Scanner.
|
||
|
#
|
||
|
# A token is not a special object, just a two-element Array
|
||
|
# consisting of
|
||
|
# * the _token_ _text_ (the original source of the token in a String) or
|
||
|
# a _token_ _action_ (begin_group, end_group, begin_line, end_line)
|
||
|
# * the _token_ _kind_ (a Symbol representing the type of the token)
|
||
|
#
|
||
|
# A token looks like this:
|
||
|
#
|
||
|
# ['# It looks like this', :comment]
|
||
|
# ['3.1415926', :float]
|
||
|
# ['$^', :error]
|
||
|
#
|
||
|
# Some scanners also yield sub-tokens, represented by special
|
||
|
# token actions, namely begin_group and end_group.
|
||
|
#
|
||
|
# The Ruby scanner, for example, splits "a string" into:
|
||
|
#
|
||
|
# [
|
||
|
# [:begin_group, :string],
|
||
|
# ['"', :delimiter],
|
||
|
# ['a string', :content],
|
||
|
# ['"', :delimiter],
|
||
|
# [:end_group, :string]
|
||
|
# ]
|
||
|
#
|
||
|
# Tokens is the interface between Scanners and Encoders:
|
||
|
# The input is split and saved into a Tokens object. The Encoder
|
||
|
# then builds the output from this object.
|
||
|
#
|
||
|
# Thus, the syntax below becomes clear:
|
||
|
#
|
||
|
# CodeRay.scan('price = 2.59', :ruby).html
|
||
|
# # the Tokens object is here -------^
|
||
|
#
|
||
|
# See how small it is? ;)
|
||
|
#
|
||
|
# Tokens gives you the power to handle pre-scanned code very easily:
|
||
|
# You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
|
||
|
# that you put in your DB.
|
||
|
#
|
||
|
# It also allows you to generate tokens directly (without using a scanner),
|
||
|
# to load them from a file, and still use any Encoder that CodeRay provides.
|
||
|
class Tokens < Array
|
||
|
|
||
|
# The Scanner instance that created the tokens.
|
||
|
attr_accessor :scanner
|
||
|
|
||
|
# Encode the tokens using encoder.
|
||
|
#
|
||
|
# encoder can be
|
||
|
# * a symbol like :html oder :statistic
|
||
|
# * an Encoder class
|
||
|
# * an Encoder object
|
||
|
#
|
||
|
# options are passed to the encoder.
|
||
|
def encode encoder, options = {}
|
||
|
encoder = Encoders[encoder].new options if encoder.respond_to? :to_sym
|
||
|
encoder.encode_tokens self, options
|
||
|
end
|
||
|
|
||
|
# Turn tokens into a string by concatenating them.
|
||
|
def to_s
|
||
|
encode CodeRay::Encoders::Encoder.new
|
||
|
end
|
||
|
|
||
|
# Redirects unknown methods to encoder calls.
|
||
|
#
|
||
|
# For example, if you call +tokens.html+, the HTML encoder
|
||
|
# is used to highlight the tokens.
|
||
|
def method_missing meth, options = {}
|
||
|
encode meth, options
|
||
|
rescue PluginHost::PluginNotFound
|
||
|
super
|
||
|
end
|
||
|
|
||
|
# Split the tokens into parts of the given +sizes+.
|
||
|
#
|
||
|
# The result will be an Array of Tokens objects. The parts have
|
||
|
# the text size specified by the parameter. In addition, each
|
||
|
# part closes all opened tokens. This is useful to insert tokens
|
||
|
# betweem them.
|
||
|
#
|
||
|
# This method is used by @Scanner#tokenize@ when called with an Array
|
||
|
# of source strings. The Diff encoder uses it for inline highlighting.
|
||
|
def split_into_parts *sizes
|
||
|
parts = []
|
||
|
opened = []
|
||
|
content = nil
|
||
|
part = Tokens.new
|
||
|
part_size = 0
|
||
|
size = sizes.first
|
||
|
i = 0
|
||
|
for item in self
|
||
|
case content
|
||
|
when nil
|
||
|
content = item
|
||
|
when String
|
||
|
if size && part_size + content.size > size # token must be cut
|
||
|
if part_size < size # some part of the token goes into this part
|
||
|
content = content.dup # content may no be safe to change
|
||
|
part << content.slice!(0, size - part_size) << item
|
||
|
end
|
||
|
# close all open groups and lines...
|
||
|
closing = opened.reverse.flatten.map do |content_or_kind|
|
||
|
case content_or_kind
|
||
|
when :begin_group
|
||
|
:end_group
|
||
|
when :begin_line
|
||
|
:end_line
|
||
|
else
|
||
|
content_or_kind
|
||
|
end
|
||
|
end
|
||
|
part.concat closing
|
||
|
begin
|
||
|
parts << part
|
||
|
part = Tokens.new
|
||
|
size = sizes[i += 1]
|
||
|
end until size.nil? || size > 0
|
||
|
# ...and open them again.
|
||
|
part.concat opened.flatten
|
||
|
part_size = 0
|
||
|
redo unless content.empty?
|
||
|
else
|
||
|
part << content << item
|
||
|
part_size += content.size
|
||
|
end
|
||
|
content = nil
|
||
|
when Symbol
|
||
|
case content
|
||
|
when :begin_group, :begin_line
|
||
|
opened << [content, item]
|
||
|
when :end_group, :end_line
|
||
|
opened.pop
|
||
|
else
|
||
|
raise ArgumentError, 'Unknown token action: %p, kind = %p' % [content, item]
|
||
|
end
|
||
|
part << content << item
|
||
|
content = nil
|
||
|
else
|
||
|
raise ArgumentError, 'Token input junk: %p, kind = %p' % [content, item]
|
||
|
end
|
||
|
end
|
||
|
parts << part
|
||
|
parts << Tokens.new while parts.size < sizes.size
|
||
|
parts
|
||
|
end
|
||
|
|
||
|
# Dumps the object into a String that can be saved
|
||
|
# in files or databases.
|
||
|
#
|
||
|
# The dump is created with Marshal.dump;
|
||
|
# In addition, it is gzipped using GZip.gzip.
|
||
|
#
|
||
|
# The returned String object includes Undumping
|
||
|
# so it has an #undump method. See Tokens.load.
|
||
|
#
|
||
|
# You can configure the level of compression,
|
||
|
# but the default value 7 should be what you want
|
||
|
# in most cases as it is a good compromise between
|
||
|
# speed and compression rate.
|
||
|
#
|
||
|
# See GZip module.
|
||
|
def dump gzip_level = 7
|
||
|
dump = Marshal.dump self
|
||
|
dump = GZip.gzip dump, gzip_level
|
||
|
dump.extend Undumping
|
||
|
end
|
||
|
|
||
|
# Return the actual number of tokens.
|
||
|
def count
|
||
|
size / 2
|
||
|
end
|
||
|
|
||
|
# Include this module to give an object an #undump
|
||
|
# method.
|
||
|
#
|
||
|
# The string returned by Tokens.dump includes Undumping.
|
||
|
module Undumping
|
||
|
# Calls Tokens.load with itself.
|
||
|
def undump
|
||
|
Tokens.load self
|
||
|
end
|
||
|
end
|
||
|
|
||
|
# Undump the object using Marshal.load, then
|
||
|
# unzip it using GZip.gunzip.
|
||
|
#
|
||
|
# The result is commonly a Tokens object, but
|
||
|
# this is not guaranteed.
|
||
|
def Tokens.load dump
|
||
|
dump = GZip.gunzip dump
|
||
|
@dump = Marshal.load dump
|
||
|
end
|
||
|
|
||
|
alias text_token push
|
||
|
def begin_group kind; push :begin_group, kind end
|
||
|
def end_group kind; push :end_group, kind end
|
||
|
def begin_line kind; push :begin_line, kind end
|
||
|
def end_line kind; push :end_line, kind end
|
||
|
alias tokens concat
|
||
|
|
||
|
end
|
||
|
|
||
|
end
|