Added syntax highlightment for repository files (using CodeRay).

Supported languages: c, ruby, rhtml, yaml, html, xml.

git-svn-id: http://redmine.rubyforge.org/svn/trunk@644 e93f8b46-1217-0410-a6f0-8f06a7374b81
This commit is contained in:
Jean-Philippe Lang 2007-08-15 20:20:18 +00:00
parent a5849ee044
commit 889d50089d
53 changed files with 5813 additions and 13 deletions

View File

@ -15,7 +15,15 @@
# along with this program; if not, write to the Free Software # along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
require 'coderay'
require 'coderay/helpers/file_type'
module RepositoriesHelper module RepositoriesHelper
def syntax_highlight(name, content)
type = CodeRay::FileType[name]
type ? CodeRay.scan(content, type).html : h(content)
end
def repository_field_tags(form, repository) def repository_field_tags(form, repository)
method = repository.class.name.demodulize.underscore + "_field_tags" method = repository.class.name.demodulize.underscore + "_field_tags"
send(method, form, repository) if repository.is_a?(Repository) && respond_to?(method) send(method, form, repository) if repository.is_a?(Repository) && respond_to?(method)

View File

@ -14,7 +14,7 @@
<% cache(@cache_key) do %> <% cache(@cache_key) do %>
<% @diff.each do |table_file| %> <% @diff.each do |table_file| %>
<% if @diff_type == 'sbs' %> <% if @diff_type == 'sbs' %>
<table class="list"> <table class="list CodeRay">
<thead> <thead>
<tr> <tr>
<th colspan="4" class="list-filename"> <th colspan="4" class="list-filename">
@ -33,13 +33,13 @@
<%= table_file[key].nb_line_left %> <%= table_file[key].nb_line_left %>
</th> </th>
<td class="line-code <%= table_file[key].type_diff_left %>"> <td class="line-code <%= table_file[key].type_diff_left %>">
<%= table_file[key].line_left %> <pre><%= table_file[key].line_left %></pre>
</td> </td>
<th class="line-num"> <th class="line-num">
<%= table_file[key].nb_line_right %> <%= table_file[key].nb_line_right %>
</th> </th>
<td class="line-code <%= table_file[key].type_diff_right %>"> <td class="line-code <%= table_file[key].type_diff_right %>">
<%= table_file[key].line_right %> <pre><%= table_file[key].line_right %></pre>
</td> </td>
</tr> </tr>
<% end %> <% end %>
@ -47,7 +47,7 @@
</table> </table>
<% else %> <% else %>
<table class="list"> <table class="list CodeRay">
<thead> <thead>
<tr> <tr>
<th colspan="3" class="list-filename"> <th colspan="3" class="list-filename">
@ -71,11 +71,11 @@
</th> </th>
<% if table_file[key].line_left.empty? %> <% if table_file[key].line_left.empty? %>
<td class="line-code <%= table_file[key].type_diff_right %>"> <td class="line-code <%= table_file[key].type_diff_right %>">
<%= table_file[key].line_right %> <pre><%= table_file[key].line_right %></pre>
</td> </td>
<% else %> <% else %>
<td class="line-code <%= table_file[key].type_diff_left %>"> <td class="line-code <%= table_file[key].type_diff_left %>">
<%= table_file[key].line_left %> <pre><%= table_file[key].line_left %></pre>
</td> </td>
<% end %> <% end %>
</tr> </tr>

View File

@ -1,6 +1,6 @@
<h2><%= render :partial => 'navigation', :locals => { :path => @path, :kind => 'file', :revision => @rev } %></h2> <h2><%= render :partial => 'navigation', :locals => { :path => @path, :kind => 'file', :revision => @rev } %></h2>
<table class="list"> <table class="list CodeRay">
<thead> <thead>
<tr> <tr>
<th colspan="2" class="list-filename"><%= @path %></th> <th colspan="2" class="list-filename"><%= @path %></th>
@ -8,10 +8,10 @@
</thead> </thead>
<tbody> <tbody>
<% line_num = 1 %> <% line_num = 1 %>
<% @content.each_line do |line| %> <% syntax_highlight(@path, @content).each_line do |line| %>
<tr> <tr>
<th class="line-num"><%= line_num %></th> <th class="line-num"><%= line_num %></th>
<td class="line-code"><%= h(line).gsub(/\s/, '&nbsp;') %></td> <td class="line-code"><pre><%= line %></pre></td>
</tr> </tr>
<% line_num += 1 %> <% line_num += 1 %>
<% end %> <% end %>

View File

@ -22,9 +22,12 @@ module Redmine
'text/plain' => 'txt', 'text/plain' => 'txt',
'text/css' => 'css', 'text/css' => 'css',
'text/html' => 'html,htm,xhtml', 'text/html' => 'html,htm,xhtml',
'text/x-c' => 'c,cpp,h',
'text/x-javascript' => 'js', 'text/x-javascript' => 'js',
'text/x-html-template' => 'rhtml', 'text/x-html-template' => 'rhtml',
'text/x-ruby' => 'rb,ruby', 'text/x-ruby' => 'rb,rbw,ruby,rake',
'text/xml' => 'xml',
'text/yaml' => 'yml,yaml',
'image/gif' => 'gif', 'image/gif' => 'gif',
'image/jpeg' => 'jpg,jpeg,jpe', 'image/jpeg' => 'jpg,jpeg,jpe',
'image/png' => 'png', 'image/png' => 'png',

View File

@ -301,7 +301,7 @@ module Redmine
# Escape the HTML for the diff # Escape the HTML for the diff
def escapeHTML(line) def escapeHTML(line)
CGI.escapeHTML(line).gsub(/\s/, '&nbsp;') CGI.escapeHTML(line)
end end
def parse_line (line, type="inline") def parse_line (line, type="inline")

View File

@ -17,8 +17,7 @@ tr.spacing {
} }
.line-code { .line-code {
font-family: "Courier New", monospace; font-size: 1.4em;
font-size: 1em;
} }
table.list thead th.list-filename { table.list thead th.list-filename {
@ -26,3 +25,99 @@ table.list thead th.list-filename {
font-weight: bolder; font-weight: bolder;
text-align: left; text-align: left;
} }
/************* Coderay styles *************/
.CodeRay {
background-color: #fafafa;
}
.CodeRay pre { margin: 0px }
span.CodeRay { white-space: pre; border: 0px; padding: 2px }
.CodeRay .no { padding: 0px 4px }
.CodeRay .code { width: 100% }
ol.CodeRay { font-size: 10pt }
ol.CodeRay li { white-space: pre }
.CodeRay .code pre { overflow: auto }
.CodeRay .debug { color:white ! important; background:blue ! important; }
.CodeRay .af { color:#00C }
.CodeRay .an { color:#007 }
.CodeRay .av { color:#700 }
.CodeRay .aw { color:#C00 }
.CodeRay .bi { color:#509; font-weight:bold }
.CodeRay .c { color:#666; }
.CodeRay .ch { color:#04D }
.CodeRay .ch .k { color:#04D }
.CodeRay .ch .dl { color:#039 }
.CodeRay .cl { color:#B06; font-weight:bold }
.CodeRay .co { color:#036; font-weight:bold }
.CodeRay .cr { color:#0A0 }
.CodeRay .cv { color:#369 }
.CodeRay .df { color:#099; font-weight:bold }
.CodeRay .di { color:#088; font-weight:bold }
.CodeRay .dl { color:black }
.CodeRay .do { color:#970 }
.CodeRay .ds { color:#D42; font-weight:bold }
.CodeRay .e { color:#666; font-weight:bold }
.CodeRay .en { color:#800; font-weight:bold }
.CodeRay .er { color:#F00; background-color:#FAA }
.CodeRay .ex { color:#F00; font-weight:bold }
.CodeRay .fl { color:#60E; font-weight:bold }
.CodeRay .fu { color:#06B; font-weight:bold }
.CodeRay .gv { color:#d70; font-weight:bold }
.CodeRay .hx { color:#058; font-weight:bold }
.CodeRay .i { color:#00D; font-weight:bold }
.CodeRay .ic { color:#B44; font-weight:bold }
.CodeRay .il { background: #eee }
.CodeRay .il .il { background: #ddd }
.CodeRay .il .il .il { background: #ccc }
.CodeRay .il .idl { font-weight: bold; color: #888 }
.CodeRay .in { color:#B2B; font-weight:bold }
.CodeRay .iv { color:#33B }
.CodeRay .la { color:#970; font-weight:bold }
.CodeRay .lv { color:#963 }
.CodeRay .oc { color:#40E; font-weight:bold }
.CodeRay .of { color:#000; font-weight:bold }
.CodeRay .op { }
.CodeRay .pc { color:#038; font-weight:bold }
.CodeRay .pd { color:#369; font-weight:bold }
.CodeRay .pp { color:#579 }
.CodeRay .pt { color:#339; font-weight:bold }
.CodeRay .r { color:#080; font-weight:bold }
.CodeRay .rx { background-color:#fff0ff }
.CodeRay .rx .k { color:#808 }
.CodeRay .rx .dl { color:#404 }
.CodeRay .rx .mod { color:#C2C }
.CodeRay .rx .fu { color:#404; font-weight: bold }
.CodeRay .s { background-color:#fff0f0 }
.CodeRay .s .s { background-color:#ffe0e0 }
.CodeRay .s .s .s { background-color:#ffd0d0 }
.CodeRay .s .k { color:#D20 }
.CodeRay .s .dl { color:#710 }
.CodeRay .sh { background-color:#f0fff0 }
.CodeRay .sh .k { color:#2B2 }
.CodeRay .sh .dl { color:#161 }
.CodeRay .sy { color:#A60 }
.CodeRay .sy .k { color:#A60 }
.CodeRay .sy .dl { color:#630 }
.CodeRay .ta { color:#070 }
.CodeRay .tf { color:#070; font-weight:bold }
.CodeRay .ts { color:#D70; font-weight:bold }
.CodeRay .ty { color:#339; font-weight:bold }
.CodeRay .v { color:#036 }
.CodeRay .xt { color:#444 }

View File

@ -0,0 +1,53 @@
= CodeRay - Trunk folder structure
== bench - Benchmarking system
All benchmarking stuff goes here.
Test inputs are stored in files named <code>example.<lang></code>.
Test outputs go to <code>bench/test.<encoder-default-file-extension></code>.
Run <code>bench/bench.rb</code> to get a usage description.
Run <code>rake bench</code> to perform an example benchmark.
== bin - Scripts
Executional files for CodeRay.
== demo - Demos and functional tests
Demonstrational scripts to show of CodeRay's features.
Run them as functional tests with <code>rake test:demos</code>.
== etc - Lots of stuff
Some addidtional files for CodeRay, mainly graphics and Vim scripts.
== gem_server - Gem output folder
For <code>rake gem</code>.
== lib - CodeRay library code
This is the base directory for the CodeRay library.
== rake_helpers - Rake helper libraries
Some files to enhance Rake, including the Autumnal Rdoc template and some scripts.
== test - Tests
Tests for the scanners.
Each language has its own subfolder and sub-suite.
Run with <code>rake test</code>.

504
vendor/plugins/coderay-0.7.6.227/LICENSE vendored Normal file
View File

@ -0,0 +1,504 @@
GNU LESSER GENERAL PUBLIC LICENSE
Version 2.1, February 1999
Copyright (C) 1991, 1999 Free Software Foundation, Inc.
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
[This is the first released version of the Lesser GPL. It also counts
as the successor of the GNU Library Public License, version 2, hence
the version number 2.1.]
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
Licenses are intended to guarantee your freedom to share and change
free software--to make sure the software is free for all its users.
This license, the Lesser General Public License, applies to some
specially designated software packages--typically libraries--of the
Free Software Foundation and other authors who decide to use it. You
can use it too, but we suggest you first think carefully about whether
this license or the ordinary General Public License is the better
strategy to use in any particular case, based on the explanations below.
When we speak of free software, we are referring to freedom of use,
not price. Our General Public Licenses are designed to make sure that
you have the freedom to distribute copies of free software (and charge
for this service if you wish); that you receive source code or can get
it if you want it; that you can change the software and use pieces of
it in new free programs; and that you are informed that you can do
these things.
To protect your rights, we need to make restrictions that forbid
distributors to deny you these rights or to ask you to surrender these
rights. These restrictions translate to certain responsibilities for
you if you distribute copies of the library or if you modify it.
For example, if you distribute copies of the library, whether gratis
or for a fee, you must give the recipients all the rights that we gave
you. You must make sure that they, too, receive or can get the source
code. If you link other code with the library, you must provide
complete object files to the recipients, so that they can relink them
with the library after making changes to the library and recompiling
it. And you must show them these terms so they know their rights.
We protect your rights with a two-step method: (1) we copyright the
library, and (2) we offer you this license, which gives you legal
permission to copy, distribute and/or modify the library.
To protect each distributor, we want to make it very clear that
there is no warranty for the free library. Also, if the library is
modified by someone else and passed on, the recipients should know
that what they have is not the original version, so that the original
author's reputation will not be affected by problems that might be
introduced by others.
Finally, software patents pose a constant threat to the existence of
any free program. We wish to make sure that a company cannot
effectively restrict the users of a free program by obtaining a
restrictive license from a patent holder. Therefore, we insist that
any patent license obtained for a version of the library must be
consistent with the full freedom of use specified in this license.
Most GNU software, including some libraries, is covered by the
ordinary GNU General Public License. This license, the GNU Lesser
General Public License, applies to certain designated libraries, and
is quite different from the ordinary General Public License. We use
this license for certain libraries in order to permit linking those
libraries into non-free programs.
When a program is linked with a library, whether statically or using
a shared library, the combination of the two is legally speaking a
combined work, a derivative of the original library. The ordinary
General Public License therefore permits such linking only if the
entire combination fits its criteria of freedom. The Lesser General
Public License permits more lax criteria for linking other code with
the library.
We call this license the "Lesser" General Public License because it
does Less to protect the user's freedom than the ordinary General
Public License. It also provides other free software developers Less
of an advantage over competing non-free programs. These disadvantages
are the reason we use the ordinary General Public License for many
libraries. However, the Lesser license provides advantages in certain
special circumstances.
For example, on rare occasions, there may be a special need to
encourage the widest possible use of a certain library, so that it becomes
a de-facto standard. To achieve this, non-free programs must be
allowed to use the library. A more frequent case is that a free
library does the same job as widely used non-free libraries. In this
case, there is little to gain by limiting the free library to free
software only, so we use the Lesser General Public License.
In other cases, permission to use a particular library in non-free
programs enables a greater number of people to use a large body of
free software. For example, permission to use the GNU C Library in
non-free programs enables many more people to use the whole GNU
operating system, as well as its variant, the GNU/Linux operating
system.
Although the Lesser General Public License is Less protective of the
users' freedom, it does ensure that the user of a program that is
linked with the Library has the freedom and the wherewithal to run
that program using a modified version of the Library.
The precise terms and conditions for copying, distribution and
modification follow. Pay close attention to the difference between a
"work based on the library" and a "work that uses the library". The
former contains code derived from the library, whereas the latter must
be combined with the library in order to run.
GNU LESSER GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License Agreement applies to any software library or other
program which contains a notice placed by the copyright holder or
other authorized party saying it may be distributed under the terms of
this Lesser General Public License (also called "this License").
Each licensee is addressed as "you".
A "library" means a collection of software functions and/or data
prepared so as to be conveniently linked with application programs
(which use some of those functions and data) to form executables.
The "Library", below, refers to any such software library or work
which has been distributed under these terms. A "work based on the
Library" means either the Library or any derivative work under
copyright law: that is to say, a work containing the Library or a
portion of it, either verbatim or with modifications and/or translated
straightforwardly into another language. (Hereinafter, translation is
included without limitation in the term "modification".)
"Source code" for a work means the preferred form of the work for
making modifications to it. For a library, complete source code means
all the source code for all modules it contains, plus any associated
interface definition files, plus the scripts used to control compilation
and installation of the library.
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running a program using the Library is not restricted, and output from
such a program is covered only if its contents constitute a work based
on the Library (independent of the use of the Library in a tool for
writing it). Whether that is true depends on what the Library does
and what the program that uses the Library does.
1. You may copy and distribute verbatim copies of the Library's
complete source code as you receive it, in any medium, provided that
you conspicuously and appropriately publish on each copy an
appropriate copyright notice and disclaimer of warranty; keep intact
all the notices that refer to this License and to the absence of any
warranty; and distribute a copy of this License along with the
Library.
You may charge a fee for the physical act of transferring a copy,
and you may at your option offer warranty protection in exchange for a
fee.
2. You may modify your copy or copies of the Library or any portion
of it, thus forming a work based on the Library, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) The modified work must itself be a software library.
b) You must cause the files modified to carry prominent notices
stating that you changed the files and the date of any change.
c) You must cause the whole of the work to be licensed at no
charge to all third parties under the terms of this License.
d) If a facility in the modified Library refers to a function or a
table of data to be supplied by an application program that uses
the facility, other than as an argument passed when the facility
is invoked, then you must make a good faith effort to ensure that,
in the event an application does not supply such function or
table, the facility still operates, and performs whatever part of
its purpose remains meaningful.
(For example, a function in a library to compute square roots has
a purpose that is entirely well-defined independent of the
application. Therefore, Subsection 2d requires that any
application-supplied function or table used by this function must
be optional: if the application does not supply it, the square
root function must still compute square roots.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Library,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Library, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote
it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Library.
In addition, mere aggregation of another work not based on the Library
with the Library (or with a work based on the Library) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may opt to apply the terms of the ordinary GNU General Public
License instead of this License to a given copy of the Library. To do
this, you must alter all the notices that refer to this License, so
that they refer to the ordinary GNU General Public License, version 2,
instead of to this License. (If a newer version than version 2 of the
ordinary GNU General Public License has appeared, then you can specify
that version instead if you wish.) Do not make any other change in
these notices.
Once this change is made in a given copy, it is irreversible for
that copy, so the ordinary GNU General Public License applies to all
subsequent copies and derivative works made from that copy.
This option is useful when you wish to copy part of the code of
the Library into a program that is not a library.
4. You may copy and distribute the Library (or a portion or
derivative of it, under Section 2) in object code or executable form
under the terms of Sections 1 and 2 above provided that you accompany
it with the complete corresponding machine-readable source code, which
must be distributed under the terms of Sections 1 and 2 above on a
medium customarily used for software interchange.
If distribution of object code is made by offering access to copy
from a designated place, then offering equivalent access to copy the
source code from the same place satisfies the requirement to
distribute the source code, even though third parties are not
compelled to copy the source along with the object code.
5. A program that contains no derivative of any portion of the
Library, but is designed to work with the Library by being compiled or
linked with it, is called a "work that uses the Library". Such a
work, in isolation, is not a derivative work of the Library, and
therefore falls outside the scope of this License.
However, linking a "work that uses the Library" with the Library
creates an executable that is a derivative of the Library (because it
contains portions of the Library), rather than a "work that uses the
library". The executable is therefore covered by this License.
Section 6 states terms for distribution of such executables.
When a "work that uses the Library" uses material from a header file
that is part of the Library, the object code for the work may be a
derivative work of the Library even though the source code is not.
Whether this is true is especially significant if the work can be
linked without the Library, or if the work is itself a library. The
threshold for this to be true is not precisely defined by law.
If such an object file uses only numerical parameters, data
structure layouts and accessors, and small macros and small inline
functions (ten lines or less in length), then the use of the object
file is unrestricted, regardless of whether it is legally a derivative
work. (Executables containing this object code plus portions of the
Library will still fall under Section 6.)
Otherwise, if the work is a derivative of the Library, you may
distribute the object code for the work under the terms of Section 6.
Any executables containing that work also fall under Section 6,
whether or not they are linked directly with the Library itself.
6. As an exception to the Sections above, you may also combine or
link a "work that uses the Library" with the Library to produce a
work containing portions of the Library, and distribute that work
under terms of your choice, provided that the terms permit
modification of the work for the customer's own use and reverse
engineering for debugging such modifications.
You must give prominent notice with each copy of the work that the
Library is used in it and that the Library and its use are covered by
this License. You must supply a copy of this License. If the work
during execution displays copyright notices, you must include the
copyright notice for the Library among them, as well as a reference
directing the user to the copy of this License. Also, you must do one
of these things:
a) Accompany the work with the complete corresponding
machine-readable source code for the Library including whatever
changes were used in the work (which must be distributed under
Sections 1 and 2 above); and, if the work is an executable linked
with the Library, with the complete machine-readable "work that
uses the Library", as object code and/or source code, so that the
user can modify the Library and then relink to produce a modified
executable containing the modified Library. (It is understood
that the user who changes the contents of definitions files in the
Library will not necessarily be able to recompile the application
to use the modified definitions.)
b) Use a suitable shared library mechanism for linking with the
Library. A suitable mechanism is one that (1) uses at run time a
copy of the library already present on the user's computer system,
rather than copying library functions into the executable, and (2)
will operate properly with a modified version of the library, if
the user installs one, as long as the modified version is
interface-compatible with the version that the work was made with.
c) Accompany the work with a written offer, valid for at
least three years, to give the same user the materials
specified in Subsection 6a, above, for a charge no more
than the cost of performing this distribution.
d) If distribution of the work is made by offering access to copy
from a designated place, offer equivalent access to copy the above
specified materials from the same place.
e) Verify that the user has already received a copy of these
materials or that you have already sent this user a copy.
For an executable, the required form of the "work that uses the
Library" must include any data and utility programs needed for
reproducing the executable from it. However, as a special exception,
the materials to be distributed need not include anything that is
normally distributed (in either source or binary form) with the major
components (compiler, kernel, and so on) of the operating system on
which the executable runs, unless that component itself accompanies
the executable.
It may happen that this requirement contradicts the license
restrictions of other proprietary libraries that do not normally
accompany the operating system. Such a contradiction means you cannot
use both them and the Library together in an executable that you
distribute.
7. You may place library facilities that are a work based on the
Library side-by-side in a single library together with other library
facilities not covered by this License, and distribute such a combined
library, provided that the separate distribution of the work based on
the Library and of the other library facilities is otherwise
permitted, and provided that you do these two things:
a) Accompany the combined library with a copy of the same work
based on the Library, uncombined with any other library
facilities. This must be distributed under the terms of the
Sections above.
b) Give prominent notice with the combined library of the fact
that part of it is a work based on the Library, and explaining
where to find the accompanying uncombined form of the same work.
8. You may not copy, modify, sublicense, link with, or distribute
the Library except as expressly provided under this License. Any
attempt otherwise to copy, modify, sublicense, link with, or
distribute the Library is void, and will automatically terminate your
rights under this License. However, parties who have received copies,
or rights, from you under this License will not have their licenses
terminated so long as such parties remain in full compliance.
9. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Library or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Library (or any work based on the
Library), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Library or works based on it.
10. Each time you redistribute the Library (or any work based on the
Library), the recipient automatically receives a license from the
original licensor to copy, distribute, link with or modify the Library
subject to these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties with
this License.
11. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Library at all. For example, if a patent
license would not permit royalty-free redistribution of the Library by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Library.
If any portion of this section is held invalid or unenforceable under any
particular circumstance, the balance of the section is intended to apply,
and the section as a whole is intended to apply in other circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
12. If the distribution and/or use of the Library is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Library under this License may add
an explicit geographical distribution limitation excluding those countries,
so that distribution is permitted only in or among countries not thus
excluded. In such case, this License incorporates the limitation as if
written in the body of this License.
13. The Free Software Foundation may publish revised and/or new
versions of the Lesser General Public License from time to time.
Such new versions will be similar in spirit to the present version,
but may differ in detail to address new problems or concerns.
Each version is given a distinguishing version number. If the Library
specifies a version number of this License which applies to it and
"any later version", you have the option of following the terms and
conditions either of that version or of any later version published by
the Free Software Foundation. If the Library does not specify a
license version number, you may choose any version ever published by
the Free Software Foundation.
14. If you wish to incorporate parts of the Library into other free
programs whose distribution conditions are incompatible with these,
write to the author to ask for permission. For software which is
copyrighted by the Free Software Foundation, write to the Free
Software Foundation; we sometimes make exceptions for this. Our
decision will be guided by the two goals of preserving the free status
of all derivatives of our free software and of promoting the sharing
and reuse of software generally.
NO WARRANTY
15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Libraries
If you develop a new library, and you want it to be of the greatest
possible use to the public, we recommend making it free software that
everyone can redistribute and change. You can do so by permitting
redistribution under these terms (or, alternatively, under the terms of the
ordinary General Public License).
To apply these terms, attach the following notices to the library. It is
safest to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least the
"copyright" line and a pointer to where the full notice is found.
<one line to give the library's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Also add information on how to contact you by electronic and paper mail.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the library, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the
library `Frob' (a library for tweaking knobs) written by James Random Hacker.
<signature of Ty Coon>, 1 April 1990
Ty Coon, President of Vice
That's all there is to it!

128
vendor/plugins/coderay-0.7.6.227/README vendored Normal file
View File

@ -0,0 +1,128 @@
= CodeRay
[- Tired of blue'n'gray? Try the original version of this documentation on
http://rd.cYcnus.de/coderay/doc (use Ctrl+Click to open it in its own frame.) -]
== About
CodeRay is a Ruby library for syntax highlighting.
Syntax highlighting means: You put your code in, and you get it back colored;
Keywords, strings, floats, comments - all in different colors.
And with line numbers.
*Syntax* *Highlighting*...
* makes code easier to read and maintain
* lets you detect syntax errors faster
* helps you to understand the syntax of a language
* looks nice
* is what everybody should have on their website
* solves all your problems and makes the girls run after you
Version: 0.7.4 (2006.october.20)
Author:: murphy (Kornelius Kalnbach)
Contact:: murphy rubychan de
Website:: coderay.rubychan.de[http://coderay.rubychan.de]
License:: GNU LGPL; see LICENSE file in the main directory.
Subversion:: $Id: README 219 2006-10-20 15:52:25Z murphy $
-----
== Installation
You need RubyGems[http://rubyforge.org/frs/?group_id=126].
% gem install coderay
Since CodeRay is still in beta stage, nightly buildy may be useful:
% gem install coderay -rs rd.cYcnus.de/coderay
=== Dependencies
CodeRay needs Ruby 1.8 and the
strscan[http://www.ruby-doc.org/stdlib/libdoc/strscan/rdoc/index.htm]
library (part of the standard library.) It should also run with Ruby 1.9 and
yarv.
== Example Usage
(Forgive me, but this is not highlighted.)
require 'coderay'
tokens = CodeRay.scan "puts 'Hello, world!'", :ruby
page = tokens.html :line_numbers => :inline, :wrap => :page
puts page
== Documentation
See CodeRay.
Please report errors in this documentation to <coderay cycnus de>.
-----
== Credits
=== Special Thanks to
* licenser (Heinz N. Gies) for ending my QBasic career, inventing the Coder
project and the input/output plugin system.
CodeRay would not exist without him.
=== Thanks to
* Caleb Clausen for writing RubyLexer (see
http://rubyforge.org/projects/rubylexer) and lots of very interesting mail
traffic
* birkenfeld (Georg Brandl) and mitsuhiku (Arnim Ronacher) for PyKleur. You
guys rock!
* Jamis Buck for writing Syntax (see http://rubyforge.org/projects/syntax)
I got some useful ideas from it.
* Doug Kearns and everyone else who worked on ruby.vim - it not only helped me
coding CodeRay, but also gave me a wonderful target to reach for the Ruby
scanner.
* everyone who used CodeBB on http://www.rubyforen.de and
http://www.infhu.de/mx
* iGEL, magichisoka, manveru, WoNáDo and everyone I forgot from rubyforen.de
* Daniel and Dethix from ruby-mine.de
* Dookie (who is no longer with us...) and Leonidas from
http://www.python-forum.de
* Andreas Schwarz for finding out that CaseIgnoringWordList was not case
ignoring! Such things really make you write tests.
* matz and all Ruby gods and gurus
* The inventors of: the computer, the internet, the true color display, HTML &
CSS, VIM, RUBY, pizza, microwaves, guitars, scouting, programming, anime,
manga, coke and green ice tea.
Where would we be without all those people?
=== Created using
* Ruby[http://ruby-lang.org/]
* Chihiro (my Sony VAIO laptop), Henrietta (my new MacBook) and
Seras (my Athlon 2200+ tower)
* VIM[http://vim.org] and TextMate[http://macromates.com]
* RDE[http://homepage2.nifty.com/sakazuki/rde_e.html]
* Microsoft Windows (yes, I confess!) and MacOS X
* Firefox[http://www.mozilla.org/products/firefox/] and
Thunderbird[http://www.mozilla.org/products/thunderbird/]
* Rake[http://rake.rubyforge.org/]
* RubyGems[http://docs.rubygems.org/]
* {Subversion/TortoiseSVN}[http://tortoisesvn.tigris.org/] using Apache via
XAMPP[http://www.apachefriends.org/en/xampp.html]
* RDoc (though I'm quite unsatisfied with it)
* GNUWin32, MinGW and some other tools to make the shell under windows a bit
more useful
* Term::ANSIColor[http://term-ansicolor.rubyforge.org/]
---
* As you can see, CodeRay was created under heavy use of *free* software.
* So CodeRay is also *free*.
* If you use CodeRay to create software, think about making this software
*free*, too.
* Thanks :)

View File

@ -0,0 +1,82 @@
#!/usr/bin/env ruby
# CodeRay Executable
#
# Version: 0.1
# Author: murphy
def err msg
$stderr.puts msg
end
begin
require 'coderay'
if ARGV.empty?
puts <<-USAGE
CodeRay #{CodeRay::VERSION} (http://rd.cYcnus.de/coderay)
Usage:
coderay -<lang> [-<format>] < file > output
coderay file [-<format>]
Example:
coderay -ruby -statistic < foo.rb
coderay codegen.c # generates codegen.c.html
USAGE
end
first, second = ARGV
if first
if first[/-(\w+)/] == first
lang = $1.to_sym
input = $stdin.read
tokens = :scan
elsif first == '-'
lang = $1.to_sym
input = $stdin.read
tokens = :scan
else
file = first
tokens = CodeRay.scan_file file
output_filename, output_ext = file, /#{Regexp.escape(File.extname(file))}$/
end
else
puts 'No lang/file given.'
exit 1
end
if second
if second[/-(\w+)/] == second
format = $1.to_sym
else
raise 'Invalid format (must be -xxx).'
end
else
$stderr.puts 'No format given; setting to default (HTML Page)'
format = :page
end
# TODO: allow streaming
if tokens == :scan
output = CodeRay::Duo[lang => format].highlight input #, :stream => true
else
output = tokens.encode format
end
out = $stdout
if output_filename
output_filename += '.' + CodeRay::Encoders[format]::FILE_EXTENSION
if File.exist? output_filename
err 'File %s already exists.' % output_filename
exit
else
out = File.open output_filename, 'w'
end
end
out.print output
rescue => boom
err "Error: #{boom.message}\n"
err boom.backtrace
err '-' * 50
err ARGV
exit 1
end

View File

@ -0,0 +1,4 @@
#!/usr/bin/env ruby
require 'coderay'
puts CodeRay::Encoders[:html]::CSS.new.stylesheet

View File

@ -0,0 +1,320 @@
# = CodeRay Library
#
# $Id: coderay.rb 227 2007-04-24 12:26:18Z murphy $
#
# CodeRay is a Ruby library for syntax highlighting.
#
# I try to make CodeRay easy to use and intuitive, but at the same time fully featured, complete,
# fast and efficient.
#
# See README.
#
# It consists mainly of
# * the main engine: CodeRay (Scanners::Scanner, Tokens/TokenStream, Encoders::Encoder), PluginHost
# * the scanners in CodeRay::Scanners
# * the encoders in CodeRay::Encoders
#
# Here's a fancy graphic to light up this gray docu:
#
# http://rd.cYcnus.de/coderay/scheme.png
#
# == Documentation
#
# See CodeRay, Encoders, Scanners, Tokens.
#
# == Usage
#
# Remember you need RubyGems to use CodeRay, unless you have it in your load path. Run Ruby with
# -rubygems option if required.
#
# === Highlight Ruby code in a string as html
#
# require 'coderay'
# print CodeRay.scan('puts "Hello, world!"', :ruby).html
#
# # prints something like this:
# puts <span class="s">&quot;Hello, world!&quot;</span>
#
#
# === Highlight C code from a file in a html div
#
# require 'coderay'
# print CodeRay.scan(File.read('ruby.h'), :c).div
# print CodeRay.scan_file('ruby.h').html.div
#
# You can include this div in your page. The used CSS styles can be printed with
#
# % coderay_stylesheet
#
# === Highlight without typing too much
#
# If you are one of the hasty (or lazy, or extremely curious) people, just run this file:
#
# % ruby -rubygems /path/to/coderay/coderay.rb > example.html
#
# and look at the file it created in your browser.
#
# = CodeRay Module
#
# The CodeRay module provides convenience methods for the engine.
#
# * The +lang+ and +format+ arguments select Scanner and Encoder to use. These are
# simply lower-case symbols, like <tt>:python</tt> or <tt>:html</tt>.
# * All methods take an optional hash as last parameter, +options+, that is send to
# the Encoder / Scanner.
# * Input and language are always sorted in this order: +code+, +lang+.
# (This is in alphabetical order, if you need a mnemonic ;)
#
# You should be able to highlight everything you want just using these methods;
# so there is no need to dive into CodeRay's deep class hierarchy.
#
# The examples in the demo directory demonstrate common cases using this interface.
#
# = Basic Access Ways
#
# Read this to get a general view what CodeRay provides.
#
# == Scanning
#
# Scanning means analysing an input string, splitting it up into Tokens.
# Each Token knows about what type it is: string, comment, class name, etc.
#
# Each +lang+ (language) has its own Scanner; for example, <tt>:ruby</tt> code is
# handled by CodeRay::Scanners::Ruby.
#
# CodeRay.scan:: Scan a string in a given language into Tokens.
# This is the most common method to use.
# CodeRay.scan_file:: Scan a file and guess the language using FileType.
#
# The Tokens object you get from these methods can encode itself; see Tokens.
#
# == Encoding
#
# Encoding means compiling Tokens into an output. This can be colored HTML or
# LaTeX, a textual statistic or just the number of non-whitespace tokens.
#
# Each Encoder provides output in a specific +format+, so you select Encoders via
# formats like <tt>:html</tt> or <tt>:statistic</tt>.
#
# CodeRay.encode:: Scan and encode a string in a given language.
# CodeRay.encode_tokens:: Encode the given tokens.
# CodeRay.encode_file:: Scan a file, guess the language using FileType and encode it.
#
# == Streaming
#
# Streaming saves RAM by running Scanner and Encoder in some sort of
# pipe mode; see TokenStream.
#
# CodeRay.scan_stream:: Scan in stream mode.
#
# == All-in-One Encoding
#
# CodeRay.encode:: Highlight a string with a given input and output format.
#
# == Instanciating
#
# You can use an Encoder instance to highlight multiple inputs. This way, the setup
# for this Encoder must only be done once.
#
# CodeRay.encoder:: Create an Encoder instance with format and options.
# CodeRay.scanner:: Create an Scanner instance for lang, with '' as default code.
#
# To make use of CodeRay.scanner, use CodeRay::Scanner::code=.
#
# The scanning methods provide more flexibility; we recommend to use these.
#
# == Reusing Scanners and Encoders
#
# If you want to re-use scanners and encoders (because that is faster), see
# CodeRay::Duo for the most convenient (and recommended) interface.
module CodeRay
# Version: Major.Minor.Teeny[.Revision]
# Major: 0 for pre-release
# Minor: odd for beta, even for stable
# Teeny: development state
# Revision: Subversion Revision number (generated on rake)
VERSION = '0.7.6'
require 'coderay/tokens'
require 'coderay/scanner'
require 'coderay/encoder'
require 'coderay/duo'
require 'coderay/style'
class << self
# Scans the given +code+ (a String) with the Scanner for +lang+.
#
# This is a simple way to use CodeRay. Example:
# require 'coderay'
# page = CodeRay.scan("puts 'Hello, world!'", :ruby).html
#
# See also demo/demo_simple.
def scan code, lang, options = {}, &block
scanner = Scanners[lang].new code, options, &block
scanner.tokenize
end
# Scans +filename+ (a path to a code file) with the Scanner for +lang+.
#
# If +lang+ is :auto or omitted, the CodeRay::FileType module is used to
# determine it. If it cannot find out what type it is, it uses
# CodeRay::Scanners::Plaintext.
#
# Calls CodeRay.scan.
#
# Example:
# require 'coderay'
# page = CodeRay.scan_file('some_c_code.c').html
def scan_file filename, lang = :auto, options = {}, &block
file = IO.read filename
if lang == :auto
require 'coderay/helpers/file_type'
lang = FileType.fetch filename, :plaintext, true
end
scan file, lang, options = {}, &block
end
# Scan the +code+ (a string) with the scanner for +lang+.
#
# Calls scan.
#
# See CodeRay.scan.
def scan_stream code, lang, options = {}, &block
options[:stream] = true
scan code, lang, options, &block
end
# Encode a string in Streaming mode.
#
# This starts scanning +code+ with the the Scanner for +lang+
# while encodes the output with the Encoder for +format+.
# +options+ will be passed to the Encoder.
#
# See CodeRay::Encoder.encode_stream
def encode_stream code, lang, format, options = {}
encoder(format, options).encode_stream code, lang, options
end
# Encode a string.
#
# This scans +code+ with the the Scanner for +lang+ and then
# encodes it with the Encoder for +format+.
# +options+ will be passed to the Encoder.
#
# See CodeRay::Encoder.encode
def encode code, lang, format, options = {}
encoder(format, options).encode code, lang, options
end
# Highlight a string into a HTML <div>.
#
# CSS styles use classes, so you have to include a stylesheet
# in your output.
#
# See encode.
def highlight code, lang, options = { :css => :class }, format = :div
encode code, lang, format, options
end
# Encode pre-scanned Tokens.
# Use this together with CodeRay.scan:
#
# require 'coderay'
#
# # Highlight a short Ruby code example in a HTML span
# tokens = CodeRay.scan '1 + 2', :ruby
# puts CodeRay.encode_tokens(tokens, :span)
#
def encode_tokens tokens, format, options = {}
encoder(format, options).encode_tokens tokens, options
end
# Encodes +filename+ (a path to a code file) with the Scanner for +lang+.
#
# See CodeRay.scan_file.
# Notice that the second argument is the output +format+, not the input language.
#
# Example:
# require 'coderay'
# page = CodeRay.encode_file 'some_c_code.c', :html
def encode_file filename, format, options = {}
tokens = scan_file filename, :auto, get_scanner_options(options)
encode_tokens tokens, format, options
end
# Highlight a file into a HTML <div>.
#
# CSS styles use classes, so you have to include a stylesheet
# in your output.
#
# See encode.
def highlight_file filename, options = { :css => :class }, format = :div
encode_file filename, format, options
end
# Finds the Encoder class for +format+ and creates an instance, passing
# +options+ to it.
#
# Example:
# require 'coderay'
#
# stats = CodeRay.encoder(:statistic)
# stats.encode("puts 17 + 4\n", :ruby)
#
# puts '%d out of %d tokens have the kind :integer.' % [
# stats.type_stats[:integer].count,
# stats.real_token_count
# ]
# #-> 2 out of 4 tokens have the kind :integer.
def encoder format, options = {}
Encoders[format].new options
end
# Finds the Scanner class for +lang+ and creates an instance, passing
# +options+ to it.
#
# See Scanner.new.
def scanner lang, options = {}
Scanners[lang].new '', options
end
# Extract the options for the scanner from the +options+ hash.
#
# Returns an empty Hash if <tt>:scanner_options</tt> is not set.
#
# This is used if a method like CodeRay.encode has to provide options
# for Encoder _and_ scanner.
def get_scanner_options options
options.fetch :scanner_options, {}
end
end
# This Exception is raised when you try to stream with something that is not
# capable of streaming.
class NotStreamableError < Exception
def initialize obj
@obj = obj
end
def to_s
'%s is not Streamable!' % @obj.class
end
end
# A dummy module that is included by subclasses of CodeRay::Scanner an CodeRay::Encoder
# to show that they are able to handle streams.
module Streamable
end
end
# Run a test script.
if $0 == __FILE__
$stderr.print 'Press key to print demo.'; gets
code = File.read(__FILE__)[/module CodeRay.*/m]
print CodeRay.scan(code, :ruby).html
end

View File

@ -0,0 +1,87 @@
module CodeRay
# = Duo
#
# $Id: scanner.rb 123 2006-03-21 14:46:34Z murphy $
#
# A Duo is a convenient way to use CodeRay. You just create a Duo,
# giving it a lang (language of the input code) and a format (desired
# output format), and call Duo#highlight with the code.
#
# Duo makes it easy to re-use both scanner and encoder for a repetitive
# task. It also provides a very easy interface syntax:
#
# require 'coderay'
# CodeRay::Duo[:python, :div].highlight 'import this'
#
# Until you want to do uncommon things with CodeRay, I recommend to use
# this method, since it takes care of everything.
class Duo
attr_accessor :lang, :format, :options
# Create a new Duo, holding a lang and a format to highlight code.
#
# simple:
# CodeRay::Duo[:ruby, :page].highlight 'bla 42'
#
# streaming:
# CodeRay::Duo[:ruby, :page].highlight 'bar 23', :stream => true
#
# with options:
# CodeRay::Duo[:ruby, :html, :hint => :debug].highlight '????::??'
#
# alternative syntax without options:
# CodeRay::Duo[:ruby => :statistic].encode 'class << self; end'
#
# alternative syntax with options:
# CodeRay::Duo[{ :ruby => :statistic }, :do => :something].encode 'abc'
#
# The options are forwarded to scanner and encoder
# (see CodeRay.get_scanner_options).
def initialize lang = nil, format = nil, options = {}
if format == nil and lang.is_a? Hash and lang.size == 1
@lang = lang.keys.first
@format = lang[@lang]
else
@lang = lang
@format = format
end
@options = options
end
class << self
# To allow calls like Duo[:ruby, :html].highlight.
alias [] new
end
# The scanner of the duo. Only created once.
def scanner
@scanner ||= CodeRay.scanner @lang, CodeRay.get_scanner_options(@options)
end
# The encoder of the duo. Only created once.
def encoder
@encoder ||= CodeRay.encoder @format, @options
end
# Tokenize and highlight the code using +scanner+ and +encoder+.
#
# If the :stream option is set, the Duo will go into streaming mode,
# saving memory for the cost of time.
def encode code, options = { :stream => false }
stream = options.delete :stream
options = @options.merge options
if stream
encoder.encode_stream(code, @lang, options)
else
scanner.code = code
encoder.encode_tokens(scanner.tokenize, options)
end
end
alias highlight encode
end
end

View File

@ -0,0 +1,177 @@
require "stringio"
module CodeRay
# This module holds the Encoder class and its subclasses.
# For example, the HTML encoder is named CodeRay::Encoders::HTML
# can be found in coderay/encoders/html.
#
# Encoders also provides methods and constants for the register
# mechanism and the [] method that returns the Encoder class
# belonging to the given format.
module Encoders
extend PluginHost
plugin_path File.dirname(__FILE__), 'encoders'
# = Encoder
#
# The Encoder base class. Together with Scanner and
# Tokens, it forms the highlighting triad.
#
# Encoder instances take a Tokens object and do something with it.
#
# The most common Encoder is surely the HTML encoder
# (CodeRay::Encoders::HTML). It highlights the code in a colorful
# html page.
# If you want the highlighted code in a div or a span instead,
# use its subclasses Div and Span.
class Encoder
extend Plugin
plugin_host Encoders
attr_reader :token_stream
class << self
# Returns if the Encoder can be used in streaming mode.
def streamable?
is_a? Streamable
end
# If FILE_EXTENSION isn't defined, this method returns the
# downcase class name instead.
def const_missing sym
if sym == :FILE_EXTENSION
plugin_id
else
super
end
end
end
# Subclasses are to store their default options in this constant.
DEFAULT_OPTIONS = { :stream => false }
# The options you gave the Encoder at creating.
attr_accessor :options
# Creates a new Encoder.
# +options+ is saved and used for all encode operations, as long
# as you don't overwrite it there by passing additional options.
#
# Encoder objects provide three encode methods:
# - encode simply takes a +code+ string and a +lang+
# - encode_tokens expects a +tokens+ object instead
# - encode_stream is like encode, but uses streaming mode.
#
# Each method has an optional +options+ parameter. These are
# added to the options you passed at creation.
def initialize options = {}
@options = self.class::DEFAULT_OPTIONS.merge options
raise "I am only the basic Encoder class. I can't encode "\
"anything. :( Use my subclasses." if self.class == Encoder
end
# Encode a Tokens object.
def encode_tokens tokens, options = {}
options = @options.merge options
setup options
compile tokens, options
finish options
end
# Encode the given +code+ after tokenizing it using the Scanner
# for +lang+.
def encode code, lang, options = {}
options = @options.merge options
scanner_options = CodeRay.get_scanner_options(options)
tokens = CodeRay.scan code, lang, scanner_options
encode_tokens tokens, options
end
# You can use highlight instead of encode, if that seems
# more clear to you.
alias highlight encode
# Encode the given +code+ using the Scanner for +lang+ in
# streaming mode.
def encode_stream code, lang, options = {}
raise NotStreamableError, self unless kind_of? Streamable
options = @options.merge options
setup options
scanner_options = CodeRay.get_scanner_options options
@token_stream =
CodeRay.scan_stream code, lang, scanner_options, &self
finish options
end
# Behave like a proc. The token method is converted to a proc.
def to_proc
method(:token).to_proc
end
# Return the default file extension for outputs of this encoder.
def file_extension
self.class::FILE_EXTENSION
end
protected
# Called with merged options before encoding starts.
# Sets @out to an empty string.
#
# See the HTML Encoder for an example of option caching.
def setup options
@out = ''
end
# Called with +text+ and +kind+ of the currently scanned token.
# For simple scanners, it's enougth to implement this method.
#
# By default, it calls text_token or block_token, depending on
# whether +text+ is a String.
def token text, kind
out =
if text.is_a? ::String # Ruby 1.9: :open.is_a? String
text_token text, kind
elsif text.is_a? ::Symbol
block_token text, kind
else
raise 'Unknown token text type: %p' % text
end
@out << out if @out
end
def text_token text, kind
end
def block_token action, kind
case action
when :open
open_token kind
when :close
close_token kind
else
raise 'unknown block action: %p' % action
end
end
# Called with merged options after encoding starts.
# The return value is the result of encoding, typically @out.
def finish options
@out
end
# Do the encoding.
#
# The already created +tokens+ object must be used; it can be a
# TokenStream or a Tokens object.
def compile tokens, options
tokens.each(&self)
end
end
end
end

View File

@ -0,0 +1,9 @@
module CodeRay
module Encoders
map :stats => :statistic,
:plain => :text,
:tex => :latex
end
end

View File

@ -0,0 +1,21 @@
module CodeRay
module Encoders
class Count < Encoder
include Streamable
register_for :count
protected
def setup options
@out = 0
end
def token text, kind
@out += 1
end
end
end
end

View File

@ -0,0 +1,41 @@
module CodeRay
module Encoders
# = Debug Encoder
#
# Fast encoder producing simple debug output.
#
# It is readable and diff-able and is used for testing.
#
# You cannot fully restore the tokens information from the
# output, because consecutive :space tokens are merged.
# Use Tokens#dump for caching purposes.
class Debug < Encoder
include Streamable
register_for :debug
FILE_EXTENSION = 'raydebug'
protected
def text_token text, kind
if kind == :space
text
else
text = text.gsub(/[)\\]/, '\\\\\0') # escape ) and \
"#{kind}(#{text})"
end
end
def open_token kind
"#{kind}<"
end
def close_token kind
">"
end
end
end
end

View File

@ -0,0 +1,20 @@
module CodeRay
module Encoders
load :html
class Div < HTML
FILE_EXTENSION = 'div.html'
register_for :div
DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({
:css => :style,
:wrap => :div,
})
end
end
end

View File

@ -0,0 +1,262 @@
require "set"
module CodeRay
module Encoders
# = HTML Encoder
#
# This is CodeRay's most important highlighter:
# It provides save, fast XHTML generation and CSS support.
#
# == Usage
#
# require 'coderay'
# puts CodeRay.scan('Some /code/', :ruby).html #-> a HTML page
# puts CodeRay.scan('Some /code/', :ruby).html(:wrap => :span)
# #-> <span class="CodeRay"><span class="co">Some</span> /code/</span>
# puts CodeRay.scan('Some /code/', :ruby).span #-> the same
#
# puts CodeRay.scan('Some code', :ruby).html(
# :wrap => nil,
# :line_numbers => :inline,
# :css => :style
# )
# #-> <span class="no">1</span> <span style="color:#036; font-weight:bold;">Some</span> code
#
# == Options
#
# === :tab_width
# Convert \t characters to +n+ spaces (a number.)
# Default: 8
#
# === :css
# How to include the styles; can be :class or :style.
#
# Default: :class
#
# === :wrap
# Wrap in :page, :div, :span or nil.
#
# You can also use Encoders::Div and Encoders::Span.
#
# Default: nil
#
# === :line_numbers
# Include line numbers in :table, :inline, :list or nil (no line numbers)
#
# Default: nil
#
# === :line_number_start
# Where to start with line number counting.
#
# Default: 1
#
# === :bold_every
# Make every +n+-th number appear bold.
#
# Default: 10
#
# === :hint
# Include some information into the output using the title attribute.
# Can be :info (show token type on mouse-over), :info_long (with full path)
# or :debug (via inspect).
#
# Default: false
class HTML < Encoder
include Streamable
register_for :html
FILE_EXTENSION = 'html'
DEFAULT_OPTIONS = {
:tab_width => 8,
:level => :xhtml,
:css => :class,
:style => :cycnus,
:wrap => nil,
:line_numbers => nil,
:line_number_start => 1,
:bold_every => 10,
:hint => false,
}
helper :output, :css
attr_reader :css
protected
HTML_ESCAPE = { #:nodoc:
'&' => '&amp;',
'"' => '&quot;',
'>' => '&gt;',
'<' => '&lt;',
}
# This was to prevent illegal HTML.
# Strange chars should still be avoided in codes.
evil_chars = Array(0x00...0x20) - [?\n, ?\t, ?\s]
evil_chars.each { |i| HTML_ESCAPE[i.chr] = ' ' }
#ansi_chars = Array(0x7f..0xff)
#ansi_chars.each { |i| HTML_ESCAPE[i.chr] = '&#%d;' % i }
# \x9 (\t) and \xA (\n) not included
#HTML_ESCAPE_PATTERN = /[\t&"><\0-\x8\xB-\x1f\x7f-\xff]/
HTML_ESCAPE_PATTERN = /[\t"&><\0-\x8\xB-\x1f]/
TOKEN_KIND_TO_INFO = Hash.new { |h, kind|
h[kind] =
case kind
when :pre_constant
'Predefined constant'
else
kind.to_s.gsub(/_/, ' ').gsub(/\b\w/) { $&.capitalize }
end
}
TRANSPARENT_TOKEN_KINDS = [
:delimiter, :modifier, :content, :escape, :inline_delimiter,
].to_set
# Generate a hint about the given +classes+ in a +hint+ style.
#
# +hint+ may be :info, :info_long or :debug.
def self.token_path_to_hint hint, classes
title =
case hint
when :info
TOKEN_KIND_TO_INFO[classes.first]
when :info_long
classes.reverse.map { |kind| TOKEN_KIND_TO_INFO[kind] }.join('/')
when :debug
classes.inspect
end
" title=\"#{title}\""
end
def setup options
super
@HTML_ESCAPE = HTML_ESCAPE.dup
@HTML_ESCAPE["\t"] = ' ' * options[:tab_width]
@opened = [nil]
@css = CSS.new options[:style]
hint = options[:hint]
if hint and not [:debug, :info, :info_long].include? hint
raise ArgumentError, "Unknown value %p for :hint; \
expected :info, :debug, false, or nil." % hint
end
case options[:css]
when :class
@css_style = Hash.new do |h, k|
c = Tokens::ClassOfKind[k.first]
if c == :NO_HIGHLIGHT and not hint
h[k.dup] = false
else
title = if hint
HTML.token_path_to_hint(hint, k[1..-1] << k.first)
else
''
end
if c == :NO_HIGHLIGHT
h[k.dup] = '<span%s>' % [title]
else
h[k.dup] = '<span%s class="%s">' % [title, c]
end
end
end
when :style
@css_style = Hash.new do |h, k|
if k.is_a? ::Array
styles = k.dup
else
styles = [k]
end
type = styles.first
classes = styles.map { |c| Tokens::ClassOfKind[c] }
if classes.first == :NO_HIGHLIGHT and not hint
h[k] = false
else
styles.shift if TRANSPARENT_TOKEN_KINDS.include? styles.first
title = HTML.token_path_to_hint hint, styles
style = @css[*classes]
h[k] =
if style
'<span%s style="%s">' % [title, style]
else
false
end
end
end
else
raise ArgumentError, "Unknown value %p for :css." % options[:css]
end
end
def finish options
not_needed = @opened.shift
@out << '</span>' * @opened.size
unless @opened.empty?
warn '%d tokens still open: %p' % [@opened.size, @opened]
end
@out.extend Output
@out.css = @css
@out.numerize! options[:line_numbers], options
@out.wrap! options[:wrap]
super
end
def token text, type
if text.is_a? ::String
if text =~ /#{HTML_ESCAPE_PATTERN}/o
text = text.gsub(/#{HTML_ESCAPE_PATTERN}/o) { |m| @HTML_ESCAPE[m] }
end
@opened[0] = type
if style = @css_style[@opened]
@out << style << text << '</span>'
else
@out << text
end
else
case text
when :open
@opened[0] = type
@out << (@css_style[@opened] || '<span>')
@opened << type
when :close
if @opened.empty?
# nothing to close
else
if $DEBUG and (@opened.size == 1 or @opened.last != type)
raise 'Malformed token stream: Trying to close a token (%p) \
that is not open. Open are: %p.' % [type, @opened[1..-1]]
end
@out << '</span>'
@opened.pop
end
when nil
raise 'Token with nil as text was given: %p' % [[text, type]]
else
raise 'unknown token kind: %p' % text
end
end
end
end
end
end

View File

@ -0,0 +1,65 @@
module CodeRay
module Encoders
class HTML
class CSS
attr :stylesheet
def CSS.load_stylesheet style = nil
CodeRay::Styles[style]
end
def initialize style = :default
@classes = Hash.new
style = CSS.load_stylesheet style
@stylesheet = [
style::CSS_MAIN_STYLES,
style::TOKEN_COLORS.gsub(/^(?!$)/, '.CodeRay ')
].join("\n")
parse style::TOKEN_COLORS
end
def [] *styles
cl = @classes[styles.first]
return '' unless cl
style = ''
1.upto(styles.size) do |offset|
break if style = cl[styles[offset .. -1]]
end
raise 'Style not found: %p' % [styles] if $DEBUG and style.empty?
return style
end
private
CSS_CLASS_PATTERN = /
( (?: # $1 = classes
\s* \. [-\w]+
)+ )
\s* \{ \s*
( [^\}]+ )? # $2 = style
\s* \} \s*
|
( . ) # $3 = error
/mx
def parse stylesheet
stylesheet.scan CSS_CLASS_PATTERN do |classes, style, error|
raise "CSS parse error: '#{error.inspect}' not recognized" if error
styles = classes.scan(/[-\w]+/)
cl = styles.pop
@classes[cl] ||= Hash.new
@classes[cl][styles] = style.to_s.strip
end
end
end
end
end
end
if $0 == __FILE__
require 'pp'
pp CodeRay::Encoders::HTML::CSS.new
end

View File

@ -0,0 +1,122 @@
module CodeRay
module Encoders
class HTML
module Output
def numerize *args
clone.numerize!(*args)
end
=begin NUMERIZABLE_WRAPPINGS = {
:table => [:div, :page, nil],
:inline => :all,
:list => [:div, :page, nil]
}
NUMERIZABLE_WRAPPINGS.default = :all
=end
def numerize! mode = :table, options = {}
return self unless mode
options = DEFAULT_OPTIONS.merge options
start = options[:line_number_start]
unless start.is_a? Integer
raise ArgumentError, "Invalid value %p for :line_number_start; Integer expected." % start
end
#allowed_wrappings = NUMERIZABLE_WRAPPINGS[mode]
#unless allowed_wrappings == :all or allowed_wrappings.include? options[:wrap]
# raise ArgumentError, "Can't numerize, :wrap must be in %p, but is %p" % [NUMERIZABLE_WRAPPINGS, options[:wrap]]
#end
bold_every = options[:bold_every]
bolding =
if bold_every == false
proc { |line| line.to_s }
elsif bold_every.is_a? Integer
raise ArgumentError, ":bolding can't be 0." if bold_every == 0
proc do |line|
if line % bold_every == 0
"<strong>#{line}</strong>" # every bold_every-th number in bold
else
line.to_s
end
end
else
raise ArgumentError, 'Invalid value %p for :bolding; false or Integer expected.' % bold_every
end
case mode
when :inline
max_width = (start + line_count).to_s.size
line = start
gsub!(/^/) do
line_number = bolding.call line
indent = ' ' * (max_width - line.to_s.size)
res = "<span class=\"no\">#{indent}#{line_number}</span> "
line += 1
res
end
when :table
# This is really ugly.
# Because even monospace fonts seem to have different heights when bold,
# I make the newline bold, both in the code and the line numbers.
# FIXME Still not working perfect for Mr. Internet Exploder
# FIXME Firefox struggles with very long codes (> 200 lines)
line_numbers = (start ... start + line_count).to_a.map(&bolding).join("\n")
line_numbers << "\n" # also for Mr. MS Internet Exploder :-/
line_numbers.gsub!(/\n/) { "<tt>\n</tt>" }
line_numbers_table_tpl = TABLE.apply('LINE_NUMBERS', line_numbers)
gsub!(/\n/) { "<tt>\n</tt>" }
wrap_in! line_numbers_table_tpl
@wrapped_in = :div
when :list
opened_tags = []
gsub!(/^.*$\n?/) do |line|
line.chomp!
open = opened_tags.join
line.scan(%r!<(/)?span[^>]*>?!) do |close,|
if close
opened_tags.pop
else
opened_tags << $&
end
end
close = '</span>' * opened_tags.size
"<li>#{open}#{line}#{close}</li>"
end
wrap_in! LIST
@wrapped_in = :div
else
raise ArgumentError, 'Unknown value %p for mode: expected one of %p' %
[mode, [:table, :list, :inline]]
end
self
end
def line_count
line_count = count("\n")
position_of_last_newline = rindex(?\n)
if position_of_last_newline
after_last_newline = self[position_of_last_newline + 1 .. -1]
ends_with_newline = after_last_newline[/\A(?:<\/span>)*\z/]
line_count += 1 if not ends_with_newline
end
line_count
end
end
end
end
end

View File

@ -0,0 +1,195 @@
module CodeRay
module Encoders
class HTML
# This module is included in the output String from thew HTML Encoder.
#
# It provides methods like wrap, div, page etc.
#
# Remember to use #clone instead of #dup to keep the modules the object was
# extended with.
#
# TODO: more doc.
module Output
require 'coderay/encoders/html/numerization.rb'
attr_accessor :css
class << self
# This makes Output look like a class.
#
# Example:
#
# a = Output.new '<span class="co">Code</span>'
# a.wrap! :page
def new string, css = CSS.new, element = nil
output = string.clone.extend self
output.wrapped_in = element
output.css = css
output
end
# Raises an exception if an object that doesn't respond to to_str is extended by Output,
# to prevent users from misuse. Use Module#remove_method to disable.
def extended o
warn "The Output module is intended to extend instances of String, not #{o.class}." unless o.respond_to? :to_str
end
def make_stylesheet css, in_tag = false
sheet = css.stylesheet
sheet = <<-CSS if in_tag
<style type="text/css">
#{sheet}
</style>
CSS
sheet
end
def page_template_for_css css
sheet = make_stylesheet css
PAGE.apply 'CSS', sheet
end
# Define a new wrapper. This is meta programming.
def wrapper *wrappers
wrappers.each do |wrapper|
define_method wrapper do |*args|
wrap wrapper, *args
end
define_method "#{wrapper}!".to_sym do |*args|
wrap! wrapper, *args
end
end
end
end
wrapper :div, :span, :page
def wrapped_in? element
wrapped_in == element
end
def wrapped_in
@wrapped_in ||= nil
end
attr_writer :wrapped_in
def wrap_in template
clone.wrap_in! template
end
def wrap_in! template
Template.wrap! self, template, 'CONTENT'
self
end
def wrap! element, *args
return self if not element or element == wrapped_in
case element
when :div
raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? nil
wrap_in! DIV
when :span
raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? nil
wrap_in! SPAN
when :page
wrap! :div if wrapped_in? nil
raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? :div
wrap_in! Output.page_template_for_css(@css)
when nil
return self
else
raise "Unknown value %p for :wrap" % element
end
@wrapped_in = element
self
end
def wrap *args
clone.wrap!(*args)
end
def stylesheet in_tag = false
Output.make_stylesheet @css, in_tag
end
class Template < String
def self.wrap! str, template, target
target = Regexp.new(Regexp.escape("<%#{target}%>"))
if template =~ target
str[0,0] = $`
str << $'
else
raise "Template target <%%%p%%> not found" % target
end
end
def apply target, replacement
target = Regexp.new(Regexp.escape("<%#{target}%>"))
if self =~ target
Template.new($` + replacement + $')
else
raise "Template target <%%%p%%> not found" % target
end
end
module Simple
def ` str #` <-- for stupid editors
Template.new str
end
end
end
extend Template::Simple
#-- don't include the templates in docu
SPAN = `<span class="CodeRay"><%CONTENT%></span>`
DIV = <<-`DIV`
<div class="CodeRay">
<div class="code"><pre><%CONTENT%></pre></div>
</div>
DIV
TABLE = <<-`TABLE`
<table class="CodeRay"><tr>
<td class="line_numbers" title="click to toggle" onclick="with (this.firstChild.style) { display = (display == '') ? 'none' : '' }"><pre><%LINE_NUMBERS%></pre></td>
<td class="code"><pre ondblclick="with (this.style) { overflow = (overflow == 'auto' || overflow == '') ? 'visible' : 'auto' }"><%CONTENT%></pre></td>
</tr></table>
TABLE
# title="double click to expand"
LIST = <<-`LIST`
<ol class="CodeRay"><%CONTENT%></ol>
LIST
PAGE = <<-`PAGE`
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="de">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<title>CodeRay HTML Encoder Example</title>
<style type="text/css">
<%CSS%>
</style>
</head>
<body style="background-color: white;">
<%CONTENT%>
</body>
</html>
PAGE
end
end
end
end

View File

@ -0,0 +1,26 @@
module CodeRay
module Encoders
# = Null Encoder
#
# Does nothing and returns an empty string.
class Null < Encoder
include Streamable
register_for :null
# Defined for faster processing
def to_proc
proc {}
end
protected
def token(*)
# do nothing
end
end
end
end

View File

@ -0,0 +1,21 @@
module CodeRay
module Encoders
load :html
class Page < HTML
FILE_EXTENSION = 'html'
register_for :page
DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({
:css => :class,
:wrap => :page,
:line_numbers => :table
})
end
end
end

View File

@ -0,0 +1,20 @@
module CodeRay
module Encoders
load :html
class Span < HTML
FILE_EXTENSION = 'span.html'
register_for :span
DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({
:css => :style,
:wrap => :span,
})
end
end
end

View File

@ -0,0 +1,77 @@
module CodeRay
module Encoders
# Makes a statistic for the given tokens.
class Statistic < Encoder
include Streamable
register_for :stats, :statistic
attr_reader :type_stats, :real_token_count
protected
TypeStats = Struct.new :count, :size
def setup options
@type_stats = Hash.new { |h, k| h[k] = TypeStats.new 0, 0 }
@real_token_count = 0
end
def generate tokens, options
@tokens = tokens
super
end
def text_token text, kind
@real_token_count += 1 unless kind == :space
@type_stats[kind].count += 1
@type_stats[kind].size += text.size
@type_stats['TOTAL'].size += text.size
@type_stats['TOTAL'].count += 1
end
# TODO Hierarchy handling
def block_token action, kind
@type_stats['TOTAL'].count += 1
@type_stats['open/close'].count += 1
end
STATS = <<-STATS
Code Statistics
Tokens %8d
Non-Whitespace %8d
Bytes Total %8d
Token Types (%d):
type count ratio size (average)
-------------------------------------------------------------
%s
STATS
# space 12007 33.81 % 1.7
TOKEN_TYPES_ROW = <<-TKR
%-20s %8d %6.2f %% %5.1f
TKR
def finish options
all = @type_stats['TOTAL']
all_count, all_size = all.count, all.size
@type_stats.each do |type, stat|
stat.size /= stat.count.to_f
end
types_stats = @type_stats.sort_by { |k, v| [-v.count, k.to_s] }.map do |k, v|
TOKEN_TYPES_ROW % [k, v.count, 100.0 * v.count / all_count, v.size]
end.join
STATS % [
all_count, @real_token_count, all_size,
@type_stats.delete_if { |k, v| k.is_a? String }.size,
types_stats
]
end
end
end
end

View File

@ -0,0 +1,32 @@
module CodeRay
module Encoders
class Text < Encoder
include Streamable
register_for :text
FILE_EXTENSION = 'txt'
DEFAULT_OPTIONS = {
:separator => ''
}
protected
def setup options
@out = ''
@sep = options[:separator]
end
def token text, kind
@out << text + @sep if text.is_a? ::String
end
def finish options
@out.chomp @sep
end
end
end
end

View File

@ -0,0 +1,44 @@
module CodeRay
module Encoders
# The Tokens encoder converts the tokens to a simple
# readable format. It doesn't use colors and is mainly
# intended for console output.
#
# The tokens are converted with Tokens.write_token.
#
# The format is:
#
# <token-kind> \t <escaped token-text> \n
#
# Example:
#
# require 'coderay'
# puts CodeRay.scan("puts 3 + 4", :ruby).tokens
#
# prints:
#
# ident puts
# space
# integer 3
# space
# operator +
# space
# integer 4
#
class Tokens < Encoder
include Streamable
register_for :tokens
FILE_EXTENSION = 'tok'
protected
def token text, kind
@out << CodeRay::Tokens.write_token(text, kind)
end
end
end
end

View File

@ -0,0 +1,70 @@
module CodeRay
module Encoders
# = XML Encoder
#
# Uses REXML. Very slow.
class XML < Encoder
include Streamable
register_for :xml
FILE_EXTENSION = 'xml'
require 'rexml/document'
DEFAULT_OPTIONS = {
:tab_width => 8,
:pretty => -1,
:transitive => false,
}
protected
def setup options
@doc = REXML::Document.new
@doc << REXML::XMLDecl.new
@tab_width = options[:tab_width]
@root = @node = @doc.add_element('coderay-tokens')
end
def finish options
@doc.write @out, options[:pretty], options[:transitive], true
@out
end
def text_token text, kind
if kind == :space
token = @node
else
token = @node.add_element kind.to_s
end
text.scan(/(\x20+)|(\t+)|(\n)|[^\x20\t\n]+/) do |space, tab, nl|
case
when space
token << REXML::Text.new(space, true)
when tab
token << REXML::Text.new(tab, true)
when nl
token << REXML::Text.new(nl, true)
else
token << REXML::Text.new($&)
end
end
end
def open_token kind
@node = @node.add_element kind.to_s
end
def close_token kind
if @node == @root
raise 'no token to close!'
end
@node = @node.parent
end
end
end
end

View File

@ -0,0 +1,22 @@
module CodeRay
module Encoders
# = YAML Encoder
#
# Slow.
class YAML < Encoder
register_for :yaml
FILE_EXTENSION = 'yaml'
protected
def compile tokens, options
require 'yaml'
@out = tokens.to_a.to_yaml
end
end
end
end

View File

@ -0,0 +1,189 @@
module CodeRay
# = FileType
#
# A simple filetype recognizer.
#
# Copyright (c) 2006 by murphy (Kornelius Kalnbach) <murphy rubychan de>
#
# License:: LGPL / ask the author
# Version:: 0.1 (2005-09-01)
#
# == Documentation
#
# # determine the type of the given
# lang = FileType[ARGV.first]
#
# # return :plaintext if the file type is unknown
# lang = FileType.fetch ARGV.first, :plaintext
#
# # try the shebang line, too
# lang = FileType.fetch ARGV.first, :plaintext, true
module FileType
UnknownFileType = Class.new Exception
class << self
# Try to determine the file type of the file.
#
# +filename+ is a relative or absolute path to a file.
#
# The file itself is only accessed when +read_shebang+ is set to true.
# That means you can get filetypes from files that don't exist.
def [] filename, read_shebang = false
name = File.basename filename
ext = File.extname name
ext.sub!(/^\./, '') # delete the leading dot
type =
TypeFromExt[ext] ||
TypeFromExt[ext.downcase] ||
TypeFromName[name] ||
TypeFromName[name.downcase]
type ||= shebang(filename) if read_shebang
type
end
def shebang filename
begin
File.open filename, 'r' do |f|
first_line = f.gets
first_line[TypeFromShebang]
end
rescue IOError
nil
end
end
# This works like Hash#fetch.
#
# If the filetype cannot be found, the +default+ value
# is returned.
def fetch filename, default = nil, read_shebang = false
if default and block_given?
warn 'block supersedes default value argument'
end
unless type = self[filename, read_shebang]
return yield if block_given?
return default if default
raise UnknownFileType, 'Could not determine type of %p.' % filename
end
type
end
end
TypeFromExt = {
'rb' => :ruby,
'rbw' => :ruby,
'rake' => :ruby,
'mab' => :ruby,
'cpp' => :c,
'c' => :c,
'h' => :c,
'xml' => :xml,
'htm' => :html,
'html' => :html,
'xhtml' => :xhtml,
'raydebug' => :debug,
'rhtml' => :rhtml,
'ss' => :scheme,
'sch' => :scheme,
'yaml' => :yaml,
'yml' => :yaml,
}
TypeFromShebang = /\b(?:ruby|perl|python|sh)\b/
TypeFromName = {
'Rakefile' => :ruby,
'Rantfile' => :ruby,
}
end
end
if $0 == __FILE__
$VERBOSE = true
eval DATA.read, nil, $0, __LINE__+4
end
__END__
require 'test/unit'
class TC_FileType < Test::Unit::TestCase
def test_fetch
assert_raise FileType::UnknownFileType do
FileType.fetch ''
end
assert_throws :not_found do
FileType.fetch '.' do
throw :not_found
end
end
assert_equal :default, FileType.fetch('c', :default)
stderr, fake_stderr = $stderr, Object.new
$err = ''
def fake_stderr.write x
$err << x
end
$stderr = fake_stderr
FileType.fetch('c', :default) { }
assert_equal "block supersedes default value argument\n", $err
$stderr = stderr
end
def test_ruby
assert_equal :ruby, FileType['test.rb']
assert_equal :ruby, FileType['C:\\Program Files\\x\\y\\c\\test.rbw']
assert_equal :ruby, FileType['/usr/bin/something/Rakefile']
assert_equal :ruby, FileType['~/myapp/gem/Rantfile']
assert_equal :ruby, FileType['./lib/tasks\repository.rake']
assert_not_equal :ruby, FileType['test_rb']
assert_not_equal :ruby, FileType['Makefile']
assert_not_equal :ruby, FileType['set.rb/set']
assert_not_equal :ruby, FileType['~/projects/blabla/rb']
end
def test_c
assert_equal :c, FileType['test.c']
assert_equal :c, FileType['C:\\Program Files\\x\\y\\c\\test.h']
assert_not_equal :c, FileType['test_c']
assert_not_equal :c, FileType['Makefile']
assert_not_equal :c, FileType['set.h/set']
assert_not_equal :c, FileType['~/projects/blabla/c']
end
def test_html
assert_equal :html, FileType['test.htm']
assert_equal :xhtml, FileType['test.xhtml']
assert_equal :xhtml, FileType['test.html.xhtml']
assert_equal :rhtml, FileType['_form.rhtml']
end
def test_yaml
assert_equal :yaml, FileType['test.yml']
assert_equal :yaml, FileType['test.yaml']
assert_equal :yaml, FileType['my.html.yaml']
assert_not_equal :yaml, FileType['YAML']
end
def test_shebang
dir = './test'
if File.directory? dir
Dir.chdir dir do
assert_equal :c, FileType['test.c']
end
end
end
end

View File

@ -0,0 +1,123 @@
# =GZip Simple
#
# A simplified interface to the gzip library +zlib+ (from the Ruby Standard Library.)
#
# Author: murphy (mail to murphy cYcnus de)
#
# Version: 0.2 (2005.may.28)
#
# ==Documentation
#
# See +GZip+ module and the +String+ extensions.
#
module GZip
require 'zlib'
# The default zipping level. 7 zips good and fast.
DEFAULT_GZIP_LEVEL = 7
# Unzips the given string +s+.
#
# Example:
# require 'gzip_simple'
# print GZip.gunzip(File.read('adresses.gz'))
def GZip.gunzip s
Zlib::Inflate.inflate s
end
# Zips the given string +s+.
#
# Example:
# require 'gzip_simple'
# File.open('adresses.gz', 'w') do |file
# file.write GZip.gzip('Mum: 0123 456 789', 9)
# end
#
# If you provide a +level+, you can control how strong
# the string is compressed:
# - 0: no compression, only convert to gzip format
# - 1: compress fast
# - 7: compress more, but still fast (default)
# - 8: compress more, slower
# - 9: compress best, very slow
def GZip.gzip s, level = DEFAULT_GZIP_LEVEL
Zlib::Deflate.new(level).deflate s, Zlib::FINISH
end
end
# String extensions to use the GZip module.
#
# The methods gzip and gunzip provide an even more simple
# interface to the ZLib:
#
# # create a big string
# x = 'a' * 1000
#
# # zip it
# x_gz = x.gzip
#
# # test the result
# puts 'Zipped %d bytes to %d bytes.' % [x.size, x_gz.size]
# #-> Zipped 1000 bytes to 19 bytes.
#
# # unzipping works
# p x_gz.gunzip == x #-> true
class String
# Returns the string, unzipped.
# See GZip.gunzip
def gunzip
GZip.gunzip self
end
# Replaces the string with its unzipped value.
# See GZip.gunzip
def gunzip!
replace gunzip
end
# Returns the string, zipped.
# +level+ is the gzip compression level, see GZip.gzip.
def gzip level = GZip::DEFAULT_GZIP_LEVEL
GZip.gzip self, level
end
# Replaces the string with its zipped value.
# See GZip.gzip.
def gzip!(*args)
replace gzip(*args)
end
end
if $0 == __FILE__
eval DATA.read, nil, $0, __LINE__+4
end
__END__
#CODE
# Testing / Benchmark
x = 'a' * 1000
x_gz = x.gzip
puts 'Zipped %d bytes to %d bytes.' % [x.size, x_gz.size] #-> Zipped 1000 bytes to 19 bytes.
p x_gz.gunzip == x #-> true
require 'benchmark'
INFO = 'packed to %0.3f%%' # :nodoc:
x = Array.new(100000) { rand(255).chr + 'aaaaaaaaa' + rand(255).chr }.join
Benchmark.bm(10) do |bm|
for level in 0..9
bm.report "zip #{level}" do
$x = x.gzip level
end
puts INFO % [100.0 * $x.size / x.size]
end
bm.report 'zip' do
$x = x.gzip
end
puts INFO % [100.0 * $x.size / x.size]
bm.report 'unzip' do
$x.gunzip
end
end

View File

@ -0,0 +1,329 @@
module CodeRay
# = PluginHost
#
# $Id: plugin.rb 220 2007-01-01 02:58:58Z murphy $
#
# A simple subclass plugin system.
#
# Example:
# class Generators < PluginHost
# plugin_path 'app/generators'
# end
#
# class Generator
# extend Plugin
# PLUGIN_HOST = Generators
# end
#
# class FancyGenerator < Generator
# register_for :fancy
# end
#
# Generators[:fancy] #-> FancyGenerator
# # or
# require_plugin 'Generators/fancy'
module PluginHost
# Raised if Encoders::[] fails because:
# * a file could not be found
# * the requested Encoder is not registered
PluginNotFound = Class.new Exception
HostNotFound = Class.new Exception
PLUGIN_HOSTS = []
PLUGIN_HOSTS_BY_ID = {} # dummy hash
# Loads all plugins using list and load.
def load_all
for plugin in list
load plugin
end
end
# Returns the Plugin for +id+.
#
# Example:
# yaml_plugin = MyPluginHost[:yaml]
def [] id, *args, &blk
plugin = validate_id(id)
begin
plugin = plugin_hash.[] plugin, *args, &blk
end while plugin.is_a? Symbol
plugin
end
# Alias for +[]+.
alias load []
def require_helper plugin_id, helper_name
path = path_to File.join(plugin_id, helper_name)
require path
end
class << self
# Adds the module/class to the PLUGIN_HOSTS list.
def extended mod
PLUGIN_HOSTS << mod
end
# Warns you that you should not #include this module.
def included mod
warn "#{name} should not be included. Use extend."
end
# Find the PluginHost for host_id.
def host_by_id host_id
unless PLUGIN_HOSTS_BY_ID.default_proc
ph = Hash.new do |h, a_host_id|
for host in PLUGIN_HOSTS
h[host.host_id] = host
end
h.fetch a_host_id, nil
end
PLUGIN_HOSTS_BY_ID.replace ph
end
PLUGIN_HOSTS_BY_ID[host_id]
end
end
# The path where the plugins can be found.
def plugin_path *args
unless args.empty?
@plugin_path = File.expand_path File.join(*args)
load_map
end
@plugin_path
end
# The host's ID.
#
# If PLUGIN_HOST_ID is not set, it is simply the class name.
def host_id
if self.const_defined? :PLUGIN_HOST_ID
self::PLUGIN_HOST_ID
else
name
end
end
# Map a plugin_id to another.
#
# Usage: Put this in a file plugin_path/_map.rb.
#
# class MyColorHost < PluginHost
# map :navy => :dark_blue,
# :maroon => :brown,
# :luna => :moon
# end
def map hash
for from, to in hash
from = validate_id from
to = validate_id to
plugin_hash[from] = to unless plugin_hash.has_key? from
end
end
# Define the default plugin to use when no plugin is found
# for a given id.
#
# See also map.
#
# class MyColorHost < PluginHost
# map :navy => :dark_blue
# default :gray
# end
def default id
id = validate_id id
plugin_hash[nil] = id
end
# Every plugin must register itself for one or more
# +ids+ by calling register_for, which calls this method.
#
# See Plugin#register_for.
def register plugin, *ids
for id in ids
unless id.is_a? Symbol
raise ArgumentError,
"id must be a Symbol, but it was a #{id.class}"
end
plugin_hash[validate_id(id)] = plugin
end
end
# A Hash of plugion_id => Plugin pairs.
def plugin_hash
@plugin_hash ||= create_plugin_hash
end
# Returns an array of all .rb files in the plugin path.
#
# The extension .rb is not included.
def list
Dir[path_to('*')].select do |file|
File.basename(file)[/^(?!_)\w+\.rb$/]
end.map do |file|
File.basename file, '.rb'
end
end
# Makes a map of all loaded plugins.
def inspect
map = plugin_hash.dup
map.each do |id, plugin|
map[id] = plugin.to_s[/(?>[\w_]+)$/]
end
"#{name}[#{host_id}]#{map.inspect}"
end
protected
# Created a new plugin list and stores it to @plugin_hash.
def create_plugin_hash
@plugin_hash =
Hash.new do |h, plugin_id|
id = validate_id(plugin_id)
path = path_to id
begin
require path
rescue LoadError => boom
if h.has_key? nil # default plugin
h[id] = h[nil]
else
raise PluginNotFound, 'Could not load plugin %p: %s' % [id, boom]
end
else
# Plugin should have registered by now
unless h.has_key? id
raise PluginNotFound,
"No #{self.name} plugin for #{id.inspect} found in #{path}."
end
end
h[id]
end
end
# Loads the map file (see map).
#
# This is done automatically when plugin_path is called.
def load_map
mapfile = path_to '_map'
if File.exist? mapfile
require mapfile
elsif $DEBUG
warn 'no _map.rb found for %s' % name
end
end
# Returns the Plugin for +id+.
# Use it like Hash#fetch.
#
# Example:
# yaml_plugin = MyPluginHost[:yaml, :default]
def fetch id, *args, &blk
plugin_hash.fetch validate_id(id), *args, &blk
end
# Returns the expected path to the plugin file for the given id.
def path_to plugin_id
File.join plugin_path, "#{plugin_id}.rb"
end
# Converts +id+ to a Symbol if it is a String,
# or returns +id+ if it already is a Symbol.
#
# Raises +ArgumentError+ for all other objects, or if the
# given String includes non-alphanumeric characters (\W).
def validate_id id
if id.is_a? Symbol or id.nil?
id
elsif id.is_a? String
if id[/\w+/] == id
id.to_sym
else
raise ArgumentError, "Invalid id: '#{id}' given."
end
else
raise ArgumentError,
"String or Symbol expected, but #{id.class} given."
end
end
end
# = Plugin
#
# Plugins have to include this module.
#
# IMPORTANT: use extend for this module.
#
# Example: see PluginHost.
module Plugin
def included mod
warn "#{name} should not be included. Use extend."
end
# Register this class for the given langs.
# Example:
# class MyPlugin < PluginHost::BaseClass
# register_for :my_id
# ...
# end
#
# See PluginHost.register.
def register_for *ids
plugin_host.register self, *ids
end
# The host for this Plugin class.
def plugin_host host = nil
if host and not host.is_a? PluginHost
raise ArgumentError,
"PluginHost expected, but #{host.class} given."
end
self.const_set :PLUGIN_HOST, host if host
self::PLUGIN_HOST
end
# Require some helper files.
#
# Example:
#
# class MyPlugin < PluginHost::BaseClass
# register_for :my_id
# helper :my_helper
#
# The above example loads the file myplugin/my_helper.rb relative to the
# file in which MyPlugin was defined.
def helper *helpers
for helper in helpers
self::PLUGIN_HOST.require_helper plugin_id, helper.to_s
end
end
# Returns the pulgin id used by the engine.
def plugin_id
name[/[\w_]+$/].downcase
end
end
# Convenience method for plugin loading.
# The syntax used is:
#
# CodeRay.require_plugin '<Host ID>/<Plugin ID>'
#
# Returns the loaded plugin.
def require_plugin path
host_id, plugin_id = path.split '/', 2
host = PluginHost.host_by_id(host_id)
raise PluginHost::HostNotFound,
"No host for #{host_id.inspect} found." unless host
host.load plugin_id
end
end

View File

@ -0,0 +1,123 @@
module CodeRay
# = WordList
#
# <b>A Hash subclass designed for mapping word lists to token types.</b>
#
# Copyright (c) 2006 by murphy (Kornelius Kalnbach) <murphy rubychan de>
#
# License:: LGPL / ask the author
# Version:: 1.1 (2006-Oct-19)
#
# A WordList is a Hash with some additional features.
# It is intended to be used for keyword recognition.
#
# WordList is highly optimized to be used in Scanners,
# typically to decide whether a given ident is a special token.
#
# For case insensitive words use CaseIgnoringWordList.
#
# Example:
#
# # define word arrays
# RESERVED_WORDS = %w[
# asm break case continue default do else
# ...
# ]
#
# PREDEFINED_TYPES = %w[
# int long short char void
# ...
# ]
#
# PREDEFINED_CONSTANTS = %w[
# EOF NULL ...
# ]
#
# # make a WordList
# IDENT_KIND = WordList.new(:ident).
# add(RESERVED_WORDS, :reserved).
# add(PREDEFINED_TYPES, :pre_type).
# add(PREDEFINED_CONSTANTS, :pre_constant)
#
# ...
#
# def scan_tokens tokens, options
# ...
#
# elsif scan(/[A-Za-z_][A-Za-z_0-9]*/)
# # use it
# kind = IDENT_KIND[match]
# ...
class WordList < Hash
# Creates a new WordList with +default+ as default value.
#
# You can activate +caching+ to store the results for every [] request.
#
# With caching, methods like +include?+ or +delete+ may no longer behave
# as you expect. Therefore, it is recommended to use the [] method only.
def initialize default = false, caching = false, &block
if block
raise ArgumentError, 'Can\'t combine block with caching.' if caching
super(&block)
else
if caching
super() do |h, k|
h[k] = h.fetch k, default
end
else
super default
end
end
end
# Add words to the list and associate them with +kind+.
#
# Returns +self+, so you can concat add calls.
def add words, kind = true
words.each do |word|
self[word] = kind
end
self
end
end
# A CaseIgnoringWordList is like a WordList, only that
# keys are compared case-insensitively.
#
# Ignoring the text case is realized by sending the +downcase+ message to
# all keys.
#
# Caching usually makes a CaseIgnoringWordList faster, but it has to be
# activated explicitely.
class CaseIgnoringWordList < WordList
# Creates a new case-insensitive WordList with +default+ as default value.
#
# You can activate caching to store the results for every [] request.
def initialize default = false, caching = false
if caching
super(default, false) do |h, k|
h[k] = h.fetch k.downcase, default
end
else
def self.[] key # :nodoc:
super(key.downcase)
end
end
end
# Add +words+ to the list and associate them with +kind+.
def add words, kind = true
words.each do |word|
self[word.downcase] = kind
end
self
end
end
end

View File

@ -0,0 +1,253 @@
module CodeRay
require 'coderay/helpers/plugin'
# = Scanners
#
# $Id: scanner.rb 222 2007-01-01 16:26:17Z murphy $
#
# This module holds the Scanner class and its subclasses.
# For example, the Ruby scanner is named CodeRay::Scanners::Ruby
# can be found in coderay/scanners/ruby.
#
# Scanner also provides methods and constants for the register
# mechanism and the [] method that returns the Scanner class
# belonging to the given lang.
#
# See PluginHost.
module Scanners
extend PluginHost
plugin_path File.dirname(__FILE__), 'scanners'
require 'strscan'
# = Scanner
#
# The base class for all Scanners.
#
# It is a subclass of Ruby's great +StringScanner+, which
# makes it easy to access the scanning methods inside.
#
# It is also +Enumerable+, so you can use it like an Array of
# Tokens:
#
# require 'coderay'
#
# c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;"
#
# for text, kind in c_scanner
# puts text if kind == :operator
# end
#
# # prints: (*==)++;
#
# OK, this is a very simple example :)
# You can also use +map+, +any?+, +find+ and even +sort_by+,
# if you want.
class Scanner < StringScanner
extend Plugin
plugin_host Scanners
# Raised if a Scanner fails while scanning
ScanError = Class.new(Exception)
require 'coderay/helpers/word_list'
# The default options for all scanner classes.
#
# Define @default_options for subclasses.
DEFAULT_OPTIONS = { :stream => false }
class << self
# Returns if the Scanner can be used in streaming mode.
def streamable?
is_a? Streamable
end
def normify code
code = code.to_s.to_unix
end
def file_extension extension = nil
if extension
@file_extension = extension.to_s
else
@file_extension ||= plugin_id.to_s
end
end
end
=begin
## Excluded for speed reasons; protected seems to make methods slow.
# Save the StringScanner methods from being called.
# This would not be useful for highlighting.
strscan_public_methods =
StringScanner.instance_methods -
StringScanner.ancestors[1].instance_methods
protected(*strscan_public_methods)
=end
# Create a new Scanner.
#
# * +code+ is the input String and is handled by the superclass
# StringScanner.
# * +options+ is a Hash with Symbols as keys.
# It is merged with the default options of the class (you can
# overwrite default options here.)
# * +block+ is the callback for streamed highlighting.
#
# If you set :stream to +true+ in the options, the Scanner uses a
# TokenStream with the +block+ as callback to handle the tokens.
#
# Else, a Tokens object is used.
def initialize code='', options = {}, &block
@options = self.class::DEFAULT_OPTIONS.merge options
raise "I am only the basic Scanner class. I can't scan "\
"anything. :( Use my subclasses." if self.class == Scanner
super Scanner.normify(code)
@tokens = options[:tokens]
if @options[:stream]
warn "warning in CodeRay::Scanner.new: :stream is set, "\
"but no block was given" unless block_given?
raise NotStreamableError, self unless kind_of? Streamable
@tokens ||= TokenStream.new(&block)
else
warn "warning in CodeRay::Scanner.new: Block given, "\
"but :stream is #{@options[:stream]}" if block_given?
@tokens ||= Tokens.new
end
setup
end
def reset
super
reset_instance
end
def string= code
code = Scanner.normify(code)
super code
reset_instance
end
# More mnemonic accessor name for the input string.
alias code string
alias code= string=
# Scans the code and returns all tokens in a Tokens object.
def tokenize new_string=nil, options = {}
options = @options.merge(options)
self.string = new_string if new_string
@cached_tokens =
if @options[:stream] # :stream must have been set already
reset unless new_string
scan_tokens @tokens, options
@tokens
else
scan_tokens @tokens, options
end
end
def tokens
@cached_tokens ||= tokenize
end
# Whether the scanner is in streaming mode.
def streaming?
!!@options[:stream]
end
# Traverses the tokens.
def each &block
raise ArgumentError,
'Cannot traverse TokenStream.' if @options[:stream]
tokens.each(&block)
end
include Enumerable
# The current line position of the scanner.
#
# Beware, this is implemented inefficiently. It should be used
# for debugging only.
def line
string[0..pos].count("\n") + 1
end
protected
# Can be implemented by subclasses to do some initialization
# that has to be done once per instance.
#
# Use reset for initialization that has to be done once per
# scan.
def setup
end
# This is the central method, and commonly the only one a
# subclass implements.
#
# Subclasses must implement this method; it must return +tokens+
# and must only use Tokens#<< for storing scanned tokens!
def scan_tokens tokens, options
raise NotImplementedError,
"#{self.class}#scan_tokens not implemented."
end
def reset_instance
@tokens.clear unless @options[:keep_tokens]
@cached_tokens = nil
end
# Scanner error with additional status information
def raise_inspect msg, tokens, state = 'No state given!', ambit = 30
raise ScanError, <<-EOE % [
***ERROR in %s: %s (after %d tokens)
tokens:
%s
current line: %d pos = %d
matched: %p state: %p
bol? = %p, eos? = %p
surrounding code:
%p ~~ %p
***ERROR***
EOE
File.basename(caller[0]),
msg,
tokens.size,
tokens.last(10).map { |t| t.inspect }.join("\n"),
line, pos,
matched, state, bol?, eos?,
string[pos-ambit,ambit],
string[pos,ambit],
]
end
end
end
end
class String
# I love this hack. It seems to silence all dos/unix/mac newline problems.
def to_unix
if index ?\r
gsub(/\r\n?/, "\n")
else
self
end
end
end

View File

@ -0,0 +1,15 @@
module CodeRay
module Scanners
map :cpp => :c,
:plain => :plaintext,
:pascal => :delphi,
:irb => :ruby,
:xml => :html,
:xhtml => :nitro_xhtml,
:nitro => :nitro_xhtml
default :plain
end
end

View File

@ -0,0 +1,165 @@
module CodeRay
module Scanners
class C < Scanner
register_for :c
include Streamable
RESERVED_WORDS = [
'asm', 'break', 'case', 'continue', 'default', 'do', 'else',
'for', 'goto', 'if', 'return', 'switch', 'while',
'struct', 'union', 'enum', 'typedef',
'static', 'register', 'auto', 'extern',
'sizeof',
'volatile', 'const', # C89
'inline', 'restrict', # C99
]
PREDEFINED_TYPES = [
'int', 'long', 'short', 'char', 'void',
'signed', 'unsigned', 'float', 'double',
'bool', 'complex', # C99
]
PREDEFINED_CONSTANTS = [
'EOF', 'NULL',
'true', 'false', # C99
]
IDENT_KIND = WordList.new(:ident).
add(RESERVED_WORDS, :reserved).
add(PREDEFINED_TYPES, :pre_type).
add(PREDEFINED_CONSTANTS, :pre_constant)
ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
def scan_tokens tokens, options
state = :initial
until eos?
kind = nil
match = nil
case state
when :initial
if scan(/ \s+ | \\\n /x)
kind = :space
elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
kind = :comment
elsif match = scan(/ \# \s* if \s* 0 /x)
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
kind = :comment
elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x)
kind = :operator
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
kind = IDENT_KIND[match]
if kind == :ident and check(/:(?!:)/)
match << scan(/:/)
kind = :label
end
elsif match = scan(/L?"/)
tokens << [:open, :string]
if match[0] == ?L
tokens << ['L', :modifier]
match = '"'
end
state = :string
kind = :delimiter
elsif scan(/#\s*(\w*)/)
kind = :preprocessor # FIXME multiline preprocs
state = :include_expected if self[1] == 'include'
elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
kind = :char
elsif scan(/0[xX][0-9A-Fa-f]+/)
kind = :hex
elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
kind = :oct
elsif scan(/(?:\d+)(?![.eEfF])/)
kind = :integer
elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
kind = :float
else
getch
kind = :error
end
when :string
if scan(/[^\\\n"]+/)
kind = :content
elsif scan(/"/)
tokens << ['"', :delimiter]
tokens << [:close, :string]
state = :initial
next
elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
kind = :char
elsif scan(/ \\ | $ /x)
tokens << [:close, :string]
kind = :error
state = :initial
else
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
end
when :include_expected
if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
kind = :include
state = :initial
elsif match = scan(/\s+/)
kind = :space
state = :initial if match.index ?\n
else
getch
kind = :error
end
else
raise_inspect 'Unknown state', tokens
end
match ||= matched
if $DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens
end
raise_inspect 'Empty token', tokens unless match
tokens << [match, kind]
end
if state == :string
tokens << [:close, :string]
end
tokens
end
end
end
end

View File

@ -0,0 +1,60 @@
module CodeRay
module Scanners
# = Debug Scanner
class Debug < Scanner
include Streamable
register_for :debug
protected
def scan_tokens tokens, options
opened_tokens = []
until eos?
kind = nil
match = nil
if scan(/\s+/)
tokens << [matched, :space]
next
elsif scan(/ (\w+) \( ( [^\)\\]* ( \\. [^\)\\]* )* ) \) /x)
kind = self[1].to_sym
match = self[2].gsub(/\\(.)/, '\1')
elsif scan(/ (\w+) < /x)
kind = self[1].to_sym
opened_tokens << kind
match = :open
elsif scan(/ > /x)
kind = opened_tokens.pop
match = :close
else
kind = :error
getch
end
match ||= matched
if $DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens
end
raise_inspect 'Empty token', tokens unless match
tokens << [match, kind]
end
tokens
end
end
end
end

View File

@ -0,0 +1,149 @@
module CodeRay
module Scanners
class Delphi < Scanner
register_for :delphi
RESERVED_WORDS = [
'and', 'array', 'as', 'at', 'asm', 'at', 'begin', 'case', 'class',
'const', 'constructor', 'destructor', 'dispinterface', 'div', 'do',
'downto', 'else', 'end', 'except', 'exports', 'file', 'finalization',
'finally', 'for', 'function', 'goto', 'if', 'implementation', 'in',
'inherited', 'initialization', 'inline', 'interface', 'is', 'label',
'library', 'mod', 'nil', 'not', 'object', 'of', 'or', 'out', 'packed',
'procedure', 'program', 'property', 'raise', 'record', 'repeat',
'resourcestring', 'set', 'shl', 'shr', 'string', 'then', 'threadvar',
'to', 'try', 'type', 'unit', 'until', 'uses', 'var', 'while', 'with',
'xor', 'on'
]
DIRECTIVES = [
'absolute', 'abstract', 'assembler', 'at', 'automated', 'cdecl',
'contains', 'deprecated', 'dispid', 'dynamic', 'export',
'external', 'far', 'forward', 'implements', 'local',
'near', 'nodefault', 'on', 'overload', 'override',
'package', 'pascal', 'platform', 'private', 'protected', 'public',
'published', 'read', 'readonly', 'register', 'reintroduce',
'requires', 'resident', 'safecall', 'stdcall', 'stored', 'varargs',
'virtual', 'write', 'writeonly'
]
IDENT_KIND = CaseIgnoringWordList.new(:ident, caching=true).
add(RESERVED_WORDS, :reserved).
add(DIRECTIVES, :directive)
NAME_FOLLOWS = CaseIgnoringWordList.new(false, caching=true).
add(%w(procedure function .))
private
def scan_tokens tokens, options
state = :initial
last_token = ''
until eos?
kind = nil
match = nil
if state == :initial
if scan(/ \s+ /x)
tokens << [matched, :space]
next
elsif scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx)
tokens << [matched, :preprocessor]
next
elsif scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx)
tokens << [matched, :comment]
next
elsif match = scan(/ <[>=]? | >=? | :=? | [-+=*\/;,@\^|\(\)\[\]] | \.\. /x)
kind = :operator
elsif match = scan(/\./)
kind = :operator
if last_token == 'end'
tokens << [match, kind]
next
end
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
kind = NAME_FOLLOWS[last_token] ? :ident : IDENT_KIND[match]
elsif match = scan(/ ' ( [^\n']|'' ) (?:'|$) /x)
tokens << [:open, :char]
tokens << ["'", :delimiter]
tokens << [self[1], :content]
tokens << ["'", :delimiter]
tokens << [:close, :char]
next
elsif match = scan(/ ' /x)
tokens << [:open, :string]
state = :string
kind = :delimiter
elsif scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x)
kind = :char
elsif scan(/ \$ [0-9A-Fa-f]+ /x)
kind = :hex
elsif scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x)
kind = :integer
elsif scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x)
kind = :float
else
kind = :error
getch
end
elsif state == :string
if scan(/[^\n']+/)
kind = :content
elsif scan(/''/)
kind = :char
elsif scan(/'/)
tokens << ["'", :delimiter]
tokens << [:close, :string]
state = :initial
next
elsif scan(/\n/)
tokens << [:close, :string]
kind = :error
state = :initial
else
raise "else case \' reached; %p not handled." % peek(1), tokens
end
else
raise 'else-case reached', tokens
end
match ||= matched
if $DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens, state
end
raise_inspect 'Empty token', tokens unless match
last_token = match
tokens << [match, kind]
end
tokens
end
end
end
end

View File

@ -0,0 +1,177 @@
module CodeRay
module Scanners
# HTML Scanner
#
# $Id$
class HTML < Scanner
include Streamable
register_for :html
ATTR_NAME = /[\w.:-]+/
ATTR_VALUE_UNQUOTED = ATTR_NAME
TAG_END = /\/?>/
HEX = /[0-9a-fA-F]/
ENTITY = /
&
(?:
\w+
|
\#
(?:
\d+
|
x#{HEX}+
)
)
;
/ox
PLAIN_STRING_CONTENT = {
"'" => /[^&'>\n]+/,
'"' => /[^&">\n]+/,
}
def reset
super
@state = :initial
end
private
def setup
@state = :initial
@plain_string_content = nil
end
def scan_tokens tokens, options
state = @state
plain_string_content = @plain_string_content
until eos?
kind = nil
match = nil
if scan(/\s+/m)
kind = :space
else
case state
when :initial
if scan(/<!--.*?-->/m)
kind = :comment
elsif scan(/<!DOCTYPE.*?>/m)
kind = :preprocessor
elsif scan(/<\?xml.*?\?>/m)
kind = :preprocessor
elsif scan(/<\?.*?\?>|<%.*?%>/m)
kind = :comment
elsif scan(/<\/[-\w_.:]*>/m)
kind = :tag
elsif match = scan(/<[-\w_.:]+>?/m)
kind = :tag
state = :attribute unless match[-1] == ?>
elsif scan(/[^<>&]+/)
kind = :plain
elsif scan(/#{ENTITY}/ox)
kind = :entity
elsif scan(/[<>&]/)
kind = :error
else
raise_inspect '[BUG] else-case reached with state %p' % [state], tokens
end
when :attribute
if scan(/#{TAG_END}/)
kind = :tag
state = :initial
elsif scan(/#{ATTR_NAME}/o)
kind = :attribute_name
state = :attribute_equal
else
kind = :error
getch
end
when :attribute_equal
if scan(/=/)
kind = :operator
state = :attribute_value
elsif scan(/#{ATTR_NAME}/o)
kind = :attribute_name
elsif scan(/#{TAG_END}/o)
kind = :tag
state = :initial
elsif scan(/./)
kind = :error
state = :attribute
end
when :attribute_value
if scan(/#{ATTR_VALUE_UNQUOTED}/o)
kind = :attribute_value
state = :attribute
elsif match = scan(/["']/)
tokens << [:open, :string]
state = :attribute_value_string
plain_string_content = PLAIN_STRING_CONTENT[match]
kind = :delimiter
elsif scan(/#{TAG_END}/o)
kind = :tag
state = :initial
else
kind = :error
getch
end
when :attribute_value_string
if scan(plain_string_content)
kind = :content
elsif scan(/['"]/)
tokens << [matched, :delimiter]
tokens << [:close, :string]
state = :attribute
next
elsif scan(/#{ENTITY}/ox)
kind = :entity
elsif scan(/&/)
kind = :content
elsif scan(/[\n>]/)
tokens << [:close, :string]
kind = :error
state = :initial
end
else
raise_inspect 'Unknown state: %p' % [state], tokens
end
end
match ||= matched
if $DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens, state
end
raise_inspect 'Empty token', tokens unless match
tokens << [match, kind]
end
if options[:keep_state]
@state = state
@plain_string_content = plain_string_content
end
tokens
end
end
end
end

View File

@ -0,0 +1,133 @@
module CodeRay
module Scanners
load :html
load :ruby
# Nitro XHTML Scanner
#
# $Id$
class NitroXHTML < Scanner
include Streamable
register_for :nitro_xhtml
NITRO_RUBY_BLOCK = /
<\?r
(?>
[^\?]*
(?> \?(?!>) [^\?]* )*
)
(?: \?> )?
|
<ruby>
(?>
[^<]*
(?> <(?!\/ruby>) [^<]* )*
)
(?: <\/ruby> )?
|
<%
(?>
[^%]*
(?> %(?!>) [^%]* )*
)
(?: %> )?
/mx
NITRO_VALUE_BLOCK = /
\#
(?:
\{
[^{}]*
(?>
\{ [^}]* \}
(?> [^{}]* )
)*
\}?
| \| [^|]* \|?
| \( [^)]* \)?
| \[ [^\]]* \]?
| \\ [^\\]* \\?
)
/x
NITRO_ENTITY = /
% (?: \#\d+ | \w+ ) ;
/
START_OF_RUBY = /
(?=[<\#%])
< (?: \?r | % | ruby> )
| \# [{(|]
| % (?: \#\d+ | \w+ ) ;
/x
CLOSING_PAREN = Hash.new do |h, p|
h[p] = p
end.update( {
'(' => ')',
'[' => ']',
'{' => '}',
} )
private
def setup
@ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
@html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
end
def reset_instance
super
@html_scanner.reset
end
def scan_tokens tokens, options
until eos?
if (match = scan_until(/(?=#{START_OF_RUBY})/o) || scan_until(/\z/)) and not match.empty?
@html_scanner.tokenize match
elsif match = scan(/#{NITRO_VALUE_BLOCK}/o)
start_tag = match[0,2]
delimiter = CLOSING_PAREN[start_tag[1,1]]
end_tag = match[-1,1] == delimiter ? delimiter : ''
tokens << [:open, :inline]
tokens << [start_tag, :inline_delimiter]
code = match[start_tag.size .. -1 - end_tag.size]
@ruby_scanner.tokenize code
tokens << [end_tag, :inline_delimiter] unless end_tag.empty?
tokens << [:close, :inline]
elsif match = scan(/#{NITRO_RUBY_BLOCK}/o)
start_tag = '<?r'
end_tag = match[-2,2] == '?>' ? '?>' : ''
tokens << [:open, :inline]
tokens << [start_tag, :inline_delimiter]
code = match[start_tag.size .. -(end_tag.size)-1]
@ruby_scanner.tokenize code
tokens << [end_tag, :inline_delimiter] unless end_tag.empty?
tokens << [:close, :inline]
elsif entity = scan(/#{NITRO_ENTITY}/o)
tokens << [entity, :entity]
elsif scan(/%/)
tokens << [matched, :error]
else
raise_inspect 'else-case reached!', tokens
end
end
tokens
end
end
end
end

View File

@ -0,0 +1,18 @@
module CodeRay
module Scanners
class Plaintext < Scanner
register_for :plaintext, :plain
include Streamable
def scan_tokens tokens, options
text = (scan_until(/\z/) || '')
tokens << [text, :plain]
end
end
end
end

View File

@ -0,0 +1,73 @@
module CodeRay
module Scanners
load :html
load :ruby
# RHTML Scanner
#
# $Id$
class RHTML < Scanner
include Streamable
register_for :rhtml
ERB_RUBY_BLOCK = /
<%(?!%)[=-]?
(?>
[^\-%]* # normal*
(?> # special
(?: %(?!>) | -(?!%>) )
[^\-%]* # normal*
)*
)
(?: -?%> )?
/x
START_OF_ERB = /
<%(?!%)
/x
private
def setup
@ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
@html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
end
def reset_instance
super
@html_scanner.reset
end
def scan_tokens tokens, options
until eos?
if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty?
@html_scanner.tokenize match
elsif match = scan(/#{ERB_RUBY_BLOCK}/o)
start_tag = match[/\A<%[-=]?/]
end_tag = match[/-?%?>?\z/]
tokens << [:open, :inline]
tokens << [start_tag, :inline_delimiter]
code = match[start_tag.size .. -1 - end_tag.size]
@ruby_scanner.tokenize code
tokens << [end_tag, :inline_delimiter] unless end_tag.empty?
tokens << [:close, :inline]
else
raise_inspect 'else-case reached!', tokens
end
end
tokens
end
end
end
end

View File

@ -0,0 +1,368 @@
module CodeRay
module Scanners
# This scanner is really complex, since Ruby _is_ a complex language!
#
# It tries to highlight 100% of all common code,
# and 90% of strange codes.
#
# It is optimized for HTML highlighting, and is not very useful for
# parsing or pretty printing.
#
# For now, I think it's better than the scanners in VIM or Syntax, or
# any highlighter I was able to find, except Caleb's RubyLexer.
#
# I hope it's also better than the rdoc/irb lexer.
class Ruby < Scanner
include Streamable
register_for :ruby
file_extension 'rb'
helper :patterns
private
def scan_tokens tokens, options
last_token_dot = false
value_expected = true
heredocs = nil
last_state = nil
state = :initial
depth = nil
inline_block_stack = []
patterns = Patterns # avoid constant lookup
until eos?
match = nil
kind = nil
if state.instance_of? patterns::StringState
# {{{
match = scan_until(state.pattern) || scan_until(/\z/)
tokens << [match, :content] unless match.empty?
break if eos?
if state.heredoc and self[1] # end of heredoc
match = getch.to_s
match << scan_until(/$/) unless eos?
tokens << [match, :delimiter]
tokens << [:close, state.type]
state = state.next_state
next
end
case match = getch
when state.delim
if state.paren
state.paren_depth -= 1
if state.paren_depth > 0
tokens << [match, :nesting_delimiter]
next
end
end
tokens << [match, :delimiter]
if state.type == :regexp and not eos?
modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
tokens << [modifiers, :modifier] unless modifiers.empty?
end
tokens << [:close, state.type]
value_expected = false
state = state.next_state
when '\\'
if state.interpreted
if esc = scan(/ #{patterns::ESCAPE} /ox)
tokens << [match + esc, :char]
else
tokens << [match, :error]
end
else
case m = getch
when state.delim, '\\'
tokens << [match + m, :char]
when nil
tokens << [match, :error]
else
tokens << [match + m, :content]
end
end
when '#'
case peek(1)
when '{'
inline_block_stack << [state, depth, heredocs]
value_expected = true
state = :initial
depth = 1
tokens << [:open, :inline]
tokens << [match + getch, :inline_delimiter]
when '$', '@'
tokens << [match, :escape]
last_state = state # scan one token as normal code, then return here
state = :initial
else
raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens
end
when state.paren
state.paren_depth += 1
tokens << [match, :nesting_delimiter]
when /#{patterns::REGEXP_SYMBOLS}/ox
tokens << [match, :function]
else
raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens
end
next
# }}}
else
# {{{
if match = scan(/[ \t\f]+/)
kind = :space
match << scan(/\s*/) unless eos? or heredocs
tokens << [match, kind]
next
elsif match = scan(/\\?\n/)
kind = :space
if match == "\n"
value_expected = true # FIXME not quite true
state = :initial if state == :undef_comma_expected
end
if heredocs
unscan # heredoc scanning needs \n at start
state = heredocs.shift
tokens << [:open, state.type]
heredocs = nil if heredocs.empty?
next
else
match << scan(/\s*/) unless eos?
end
tokens << [match, kind]
next
elsif match = scan(/\#.*/) or
( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
kind = :comment
value_expected = true
tokens << [match, kind]
next
elsif state == :initial
# IDENTS #
if match = scan(/#{patterns::METHOD_NAME}/o)
if last_token_dot
kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
else
kind = patterns::IDENT_KIND[match]
if kind == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
kind = :constant
elsif kind == :reserved
state = patterns::DEF_NEW_STATE[match]
end
end
## experimental!
value_expected = :set if
patterns::REGEXP_ALLOWED[match] or check(/#{patterns::VALUE_FOLLOWS}/o)
elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o)
kind = :ident
value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
# OPERATORS #
elsif not last_token_dot and match = scan(/ \.\.\.? | (?:\.|::)() | [,\(\)\[\]\{\}] | ==?=? /x)
if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
value_expected = :set
end
last_token_dot = :set if self[1]
kind = :operator
unless inline_block_stack.empty?
case match
when '{'
depth += 1
when '}'
depth -= 1
if depth == 0 # closing brace of inline block reached
state, depth, heredocs = inline_block_stack.pop
tokens << [match, :inline_delimiter]
kind = :inline
match = :close
end
end
end
elsif match = scan(/ ['"] /mx)
tokens << [:open, :string]
kind = :delimiter
state = patterns::StringState.new :string, match == '"', match # important for streaming
elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o)
kind = :instance_variable
elsif value_expected and match = scan(/\//)
tokens << [:open, :regexp]
kind = :delimiter
interpreted = true
state = patterns::StringState.new :regexp, interpreted, match
elsif match = scan(/#{patterns::NUMERIC}/o)
kind = if self[1] then :float else :integer end
elsif match = scan(/#{patterns::SYMBOL}/o)
case delim = match[1]
when ?', ?"
tokens << [:open, :symbol]
tokens << [':', :symbol]
match = delim.chr
kind = :delimiter
state = patterns::StringState.new :symbol, delim == ?", match
else
kind = :symbol
end
elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x)
value_expected = :set
kind = :operator
elsif value_expected and match = scan(/#{patterns::HEREDOC_OPEN}/o)
indented = self[1] == '-'
quote = self[3]
delim = self[quote ? 4 : 2]
kind = patterns::QUOTE_TO_TYPE[quote]
tokens << [:open, kind]
tokens << [match, :delimiter]
match = :close
heredoc = patterns::StringState.new kind, quote != '\'', delim, (indented ? :indented : :linestart )
heredocs ||= [] # create heredocs if empty
heredocs << heredoc
elsif value_expected and match = scan(/#{patterns::FANCY_START_CORRECT}/o)
kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do
raise_inspect 'Unknown fancy string: %%%p' % k, tokens
end
tokens << [:open, kind]
state = patterns::StringState.new kind, interpreted, self[2]
kind = :delimiter
elsif value_expected and match = scan(/#{patterns::CHARACTER}/o)
kind = :integer
elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
value_expected = :set
kind = :operator
elsif match = scan(/`/)
if last_token_dot
kind = :operator
else
tokens << [:open, :shell]
kind = :delimiter
state = patterns::StringState.new :shell, true, match
end
elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o)
kind = :global_variable
elsif match = scan(/#{patterns::CLASS_VARIABLE}/o)
kind = :class_variable
else
kind = :error
match = getch
end
elsif state == :def_expected
state = :initial
if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
kind = :method
else
next
end
elsif state == :undef_expected
state = :undef_comma_expected
if match = scan(/#{patterns::METHOD_NAME_EX}/o)
kind = :method
elsif match = scan(/#{patterns::SYMBOL}/o)
case delim = match[1]
when ?', ?"
tokens << [:open, :symbol]
tokens << [':', :symbol]
match = delim.chr
kind = :delimiter
state = patterns::StringState.new :symbol, delim == ?", match
state.next_state = :undef_comma_expected
else
kind = :symbol
end
else
state = :initial
next
end
elsif state == :undef_comma_expected
if match = scan(/,/)
kind = :operator
state = :undef_expected
else
state = :initial
next
end
elsif state == :module_expected
if match = scan(/<</)
kind = :operator
else
state = :initial
if match = scan(/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
kind = :class
else
next
end
end
end
# }}}
value_expected = value_expected == :set
last_token_dot = last_token_dot == :set
if $DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens, state
end
raise_inspect 'Empty token', tokens unless match
tokens << [match, kind]
if last_state
state = last_state
last_state = nil
end
end
end
inline_block_stack << [state] if state.is_a? patterns::StringState
until inline_block_stack.empty?
this_block = inline_block_stack.pop
tokens << [:close, :inline] if this_block.size > 1
state = this_block.first
tokens << [:close, state.type]
end
tokens
end
end
end
end
# vim:fdm=marker

View File

@ -0,0 +1,230 @@
module CodeRay
module Scanners
module Ruby::Patterns # :nodoc:
RESERVED_WORDS = %w[
and def end in or unless begin
defined? ensure module redo super until
BEGIN break do next rescue then
when END case else for retry
while alias class elsif if not return
undef yield
]
DEF_KEYWORDS = %w[ def ]
UNDEF_KEYWORDS = %w[ undef ]
MODULE_KEYWORDS = %w[class module]
DEF_NEW_STATE = WordList.new(:initial).
add(DEF_KEYWORDS, :def_expected).
add(UNDEF_KEYWORDS, :undef_expected).
add(MODULE_KEYWORDS, :module_expected)
IDENTS_ALLOWING_REGEXP = %w[
and or not while until unless if then elsif when sub sub! gsub gsub!
scan slice slice! split
]
REGEXP_ALLOWED = WordList.new(false).
add(IDENTS_ALLOWING_REGEXP, :set)
PREDEFINED_CONSTANTS = %w[
nil true false self
DATA ARGV ARGF __FILE__ __LINE__
]
IDENT_KIND = WordList.new(:ident).
add(RESERVED_WORDS, :reserved).
add(PREDEFINED_CONSTANTS, :pre_constant)
IDENT = /[a-z_][\w_]*/i
METHOD_NAME = / #{IDENT} [?!]? /ox
METHOD_NAME_OPERATOR = /
\*\*? # multiplication and power
| [-+]@? # plus, minus
| [\/%&|^`~] # division, modulo or format strings, &and, |or, ^xor, `system`, tilde
| \[\]=? # array getter and setter
| << | >> # append or shift left, shift right
| <=?>? | >=? # comparison, rocket operator
| ===? # simple equality and case equality
/ox
METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox
INSTANCE_VARIABLE = / @ #{IDENT} /ox
CLASS_VARIABLE = / @@ #{IDENT} /ox
OBJECT_VARIABLE = / @@? #{IDENT} /ox
GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox
PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} |#{OBJECT_VARIABLE} /ox
VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox
QUOTE_TO_TYPE = {
'`' => :shell,
'/'=> :regexp,
}
QUOTE_TO_TYPE.default = :string
REGEXP_MODIFIERS = /[mixounse]*/
REGEXP_SYMBOLS = /[|?*+?(){}\[\].^$]/
DECIMAL = /\d+(?:_\d+)*/
OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
BINARY = /0b[01]+(?:_[01]+)*/
EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox
FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox
NUMERIC = / [-+]? (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox
SYMBOL = /
:
(?:
#{METHOD_NAME_EX}
| #{PREFIX_VARIABLE}
| ['"]
)
/ox
# TODO investigste \M, \c and \C escape sequences
# (?: M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-)? (?: \\ (?: [0-7]{3} | x[0-9A-Fa-f]{2} | . ) )
# assert_equal(225, ?\M-a)
# assert_equal(129, ?\M-\C-a)
ESCAPE = /
[abefnrstv]
| M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-
| [0-7]{1,3}
| x[0-9A-Fa-f]{1,2}
| .
/mx
CHARACTER = /
\?
(?:
[^\s\\]
| \\ #{ESCAPE}
)
/mx
# NOTE: This is not completely correct, but
# nobody needs heredoc delimiters ending with \n.
HEREDOC_OPEN = /
<< (-)? # $1 = float
(?:
( [A-Za-z_0-9]+ ) # $2 = delim
|
( ["'`\/] ) # $3 = quote, type
( [^\n]*? ) \3 # $4 = delim
)
/mx
RUBYDOC = /
=begin (?!\S)
.*?
(?: \Z | ^=end (?!\S) [^\n]* )
/mx
DATA = /
__END__$
.*?
(?: \Z | (?=^\#CODE) )
/mx
# Checks for a valid value to follow. This enables
# fancy_allowed in method calls.
VALUE_FOLLOWS = /
\s+
(?:
[%\/][^\s=]
|
<<-?\S
|
#{CHARACTER}
)
/x
RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo
RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x
# FIXME: \s and = are only a workaround, they are still allowed
# as delimiters.
FANCY_START_SAVE = / % ( [qQwWxsr] | (?![a-zA-Z0-9\s=]) ) ([^a-zA-Z0-9]) /mx
FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx
FancyStringType = {
'q' => [:string, false],
'Q' => [:string, true],
'r' => [:regexp, true],
's' => [:symbol, false],
'x' => [:shell, true]
}
FancyStringType['w'] = FancyStringType['q']
FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q']
class StringState < Struct.new :type, :interpreted, :delim, :heredoc,
:paren, :paren_depth, :pattern, :next_state
CLOSING_PAREN = Hash[ *%w[
( )
[ ]
< >
{ }
] ]
CLOSING_PAREN.values.each { |o| o.freeze } # debug, if I try to change it with <<
OPENING_PAREN = CLOSING_PAREN.invert
STRING_PATTERN = Hash.new { |h, k|
delim, interpreted = *k
delim_pattern = Regexp.escape(delim.dup)
if closing_paren = CLOSING_PAREN[delim]
delim_pattern << Regexp.escape(closing_paren)
end
special_escapes =
case interpreted
when :regexp_symbols
'| ' + REGEXP_SYMBOLS.source
when :words
'| \s'
end
h[k] =
if interpreted and not delim == '#'
/ (?= [#{delim_pattern}\\] | \# [{$@] #{special_escapes} ) /mx
else
/ (?= [#{delim_pattern}\\] #{special_escapes} ) /mx
end
}
HEREDOC_PATTERN = Hash.new { |h, k|
delim, interpreted, indented = *k
delim_pattern = Regexp.escape(delim.dup)
delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
h[k] =
if interpreted
/ (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc
else
/ (?= #{delim_pattern}() | \\ ) /mx
end
}
def initialize kind, interpreted, delim, heredoc = false
if heredoc
pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ]
delim = nil
else
pattern = STRING_PATTERN[ [delim, interpreted] ]
if paren = CLOSING_PAREN[delim]
delim, paren = paren, delim
paren_depth = 1
end
end
super kind, interpreted, delim, heredoc, paren, paren_depth, pattern, :initial
end
end unless defined? StringState
end
end
end

View File

@ -0,0 +1,142 @@
module CodeRay
module Scanners
# Scheme scanner for CodeRay (by closure).
# Thanks to murphy for putting CodeRay into public.
class Scheme < Scanner
register_for :scheme
file_extension :scm
CORE_FORMS = %w[
lambda let let* letrec syntax-case define-syntax let-syntax
letrec-syntax begin define quote if or and cond case do delay
quasiquote set! cons force call-with-current-continuation call/cc
]
IDENT_KIND = CaseIgnoringWordList.new(:ident).
add(CORE_FORMS, :reserved)
#IDENTIFIER_INITIAL = /[a-z!@\$%&\*\/\:<=>\?~_\^]/i
#IDENTIFIER_SUBSEQUENT = /#{IDENTIFIER_INITIAL}|\d|\.|\+|-/
#IDENTIFIER = /#{IDENTIFIER_INITIAL}#{IDENTIFIER_SUBSEQUENT}*|\+|-|\.{3}/
IDENTIFIER = /[a-zA-Z!@$%&*\/:<=>?~_^][\w!@$%&*\/:<=>?~^.+\-]*|[+-]|\.\.\./
DIGIT = /\d/
DIGIT10 = DIGIT
DIGIT16 = /[0-9a-f]/i
DIGIT8 = /[0-7]/
DIGIT2 = /[01]/
RADIX16 = /\#x/i
RADIX8 = /\#o/i
RADIX2 = /\#b/i
RADIX10 = /\#d/i
EXACTNESS = /#i|#e/i
SIGN = /[\+-]?/
EXP_MARK = /[esfdl]/i
EXP = /#{EXP_MARK}#{SIGN}#{DIGIT}+/
SUFFIX = /#{EXP}?/
PREFIX10 = /#{RADIX10}?#{EXACTNESS}?|#{EXACTNESS}?#{RADIX10}?/
PREFIX16 = /#{RADIX16}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX16}/
PREFIX8 = /#{RADIX8}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX8}/
PREFIX2 = /#{RADIX2}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX2}/
UINT10 = /#{DIGIT10}+#*/
UINT16 = /#{DIGIT16}+#*/
UINT8 = /#{DIGIT8}+#*/
UINT2 = /#{DIGIT2}+#*/
DECIMAL = /#{DIGIT10}+#+\.#*#{SUFFIX}|#{DIGIT10}+\.#{DIGIT10}*#*#{SUFFIX}|\.#{DIGIT10}+#*#{SUFFIX}|#{UINT10}#{EXP}/
UREAL10 = /#{UINT10}\/#{UINT10}|#{DECIMAL}|#{UINT10}/
UREAL16 = /#{UINT16}\/#{UINT16}|#{UINT16}/
UREAL8 = /#{UINT8}\/#{UINT8}|#{UINT8}/
UREAL2 = /#{UINT2}\/#{UINT2}|#{UINT2}/
REAL10 = /#{SIGN}#{UREAL10}/
REAL16 = /#{SIGN}#{UREAL16}/
REAL8 = /#{SIGN}#{UREAL8}/
REAL2 = /#{SIGN}#{UREAL2}/
IMAG10 = /i|#{UREAL10}i/
IMAG16 = /i|#{UREAL16}i/
IMAG8 = /i|#{UREAL8}i/
IMAG2 = /i|#{UREAL2}i/
COMPLEX10 = /#{REAL10}@#{REAL10}|#{REAL10}\+#{IMAG10}|#{REAL10}-#{IMAG10}|\+#{IMAG10}|-#{IMAG10}|#{REAL10}/
COMPLEX16 = /#{REAL16}@#{REAL16}|#{REAL16}\+#{IMAG16}|#{REAL16}-#{IMAG16}|\+#{IMAG16}|-#{IMAG16}|#{REAL16}/
COMPLEX8 = /#{REAL8}@#{REAL8}|#{REAL8}\+#{IMAG8}|#{REAL8}-#{IMAG8}|\+#{IMAG8}|-#{IMAG8}|#{REAL8}/
COMPLEX2 = /#{REAL2}@#{REAL2}|#{REAL2}\+#{IMAG2}|#{REAL2}-#{IMAG2}|\+#{IMAG2}|-#{IMAG2}|#{REAL2}/
NUM10 = /#{PREFIX10}?#{COMPLEX10}/
NUM16 = /#{PREFIX16}#{COMPLEX16}/
NUM8 = /#{PREFIX8}#{COMPLEX8}/
NUM2 = /#{PREFIX2}#{COMPLEX2}/
NUM = /#{NUM10}|#{NUM16}|#{NUM8}|#{NUM2}/
private
def scan_tokens tokens,options
state = :initial
ident_kind = IDENT_KIND
until eos?
kind = match = nil
case state
when :initial
if scan(/ \s+ | \\\n /x)
kind = :space
elsif scan(/['\(\[\)\]]|#\(/)
kind = :operator_fat
elsif scan(/;.*/)
kind = :comment
elsif scan(/#\\(?:newline|space|.?)/)
kind = :char
elsif scan(/#[ft]/)
kind = :pre_constant
elsif scan(/#{IDENTIFIER}/o)
kind = ident_kind[matched]
elsif scan(/\./)
kind = :operator
elsif scan(/"/)
tokens << [:open, :string]
state = :string
tokens << ['"', :delimiter]
next
elsif scan(/#{NUM}/o) and not matched.empty?
kind = :integer
elsif getch
kind = :error
end
when :string
if scan(/[^"\\]+/) or scan(/\\.?/)
kind = :content
elsif scan(/"/)
tokens << ['"', :delimiter]
tokens << [:close, :string]
state = :initial
next
else
raise_inspect "else case \" reached; %p not handled." % peek(1),
tokens, state
end
else
raise "else case reached"
end
match ||= matched
if $DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens
end
raise_inspect 'Empty token', tokens, state unless match
tokens << [match, kind]
end # until eos
if state == :string
tokens << [:close, :string]
end
tokens
end #scan_tokens
end #class
end #module scanners
end #module coderay

View File

@ -0,0 +1,18 @@
module CodeRay
module Scanners
load :html
# XML Scanner
#
# $Id$
#
# Currently this is the same scanner as Scanners::HTML.
class XML < HTML
register_for :xml
end
end
end

View File

@ -0,0 +1,20 @@
module CodeRay
# This module holds the Style class and its subclasses.
#
# See Plugin.
module Styles
extend PluginHost
plugin_path File.dirname(__FILE__), 'styles'
class Style
extend Plugin
plugin_host Styles
DEFAULT_OPTIONS = { }
end
end
end

View File

@ -0,0 +1,7 @@
module CodeRay
module Styles
default :cycnus
end
end

View File

@ -0,0 +1,127 @@
module CodeRay
module Styles
class Cycnus < Style
register_for :cycnus
code_background = '#f8f8f8'
numbers_background = '#def'
border_color = 'silver'
normal_color = '#100'
CSS_MAIN_STYLES = <<-MAIN
.CodeRay {
background-color: #{code_background};
border: 1px solid #{border_color};
font-family: 'Courier New', 'Terminal', monospace;
color: #{normal_color};
}
.CodeRay pre { margin: 0px }
div.CodeRay { }
span.CodeRay { white-space: pre; border: 0px; padding: 2px }
table.CodeRay { border-collapse: collapse; width: 100%; padding: 2px }
table.CodeRay td { padding: 2px 4px; vertical-align: top }
.CodeRay .line_numbers, .CodeRay .no {
background-color: #{numbers_background};
color: gray;
text-align: right;
}
.CodeRay .line_numbers tt { font-weight: bold }
.CodeRay .no { padding: 0px 4px }
.CodeRay .code { width: 100% }
ol.CodeRay { font-size: 10pt }
ol.CodeRay li { white-space: pre }
.CodeRay .code pre { overflow: auto }
MAIN
TOKEN_COLORS = <<-'TOKENS'
.debug { color:white ! important; background:blue ! important; }
.af { color:#00C }
.an { color:#007 }
.av { color:#700 }
.aw { color:#C00 }
.bi { color:#509; font-weight:bold }
.c { color:#666; }
.ch { color:#04D }
.ch .k { color:#04D }
.ch .dl { color:#039 }
.cl { color:#B06; font-weight:bold }
.co { color:#036; font-weight:bold }
.cr { color:#0A0 }
.cv { color:#369 }
.df { color:#099; font-weight:bold }
.di { color:#088; font-weight:bold }
.dl { color:black }
.do { color:#970 }
.ds { color:#D42; font-weight:bold }
.e { color:#666; font-weight:bold }
.en { color:#800; font-weight:bold }
.er { color:#F00; background-color:#FAA }
.ex { color:#F00; font-weight:bold }
.fl { color:#60E; font-weight:bold }
.fu { color:#06B; font-weight:bold }
.gv { color:#d70; font-weight:bold }
.hx { color:#058; font-weight:bold }
.i { color:#00D; font-weight:bold }
.ic { color:#B44; font-weight:bold }
.il { background: #eee }
.il .il { background: #ddd }
.il .il .il { background: #ccc }
.il .idl { font-weight: bold; color: #888 }
.in { color:#B2B; font-weight:bold }
.iv { color:#33B }
.la { color:#970; font-weight:bold }
.lv { color:#963 }
.oc { color:#40E; font-weight:bold }
.of { color:#000; font-weight:bold }
.op { }
.pc { color:#038; font-weight:bold }
.pd { color:#369; font-weight:bold }
.pp { color:#579 }
.pt { color:#339; font-weight:bold }
.r { color:#080; font-weight:bold }
.rx { background-color:#fff0ff }
.rx .k { color:#808 }
.rx .dl { color:#404 }
.rx .mod { color:#C2C }
.rx .fu { color:#404; font-weight: bold }
.s { background-color:#fff0f0 }
.s .s { background-color:#ffe0e0 }
.s .s .s { background-color:#ffd0d0 }
.s .k { color:#D20 }
.s .dl { color:#710 }
.sh { background-color:#f0fff0 }
.sh .k { color:#2B2 }
.sh .dl { color:#161 }
.sy { color:#A60 }
.sy .k { color:#A60 }
.sy .dl { color:#630 }
.ta { color:#070 }
.tf { color:#070; font-weight:bold }
.ts { color:#D70; font-weight:bold }
.ty { color:#339; font-weight:bold }
.v { color:#036 }
.xt { color:#444 }
TOKENS
end
end
end

View File

@ -0,0 +1,119 @@
module CodeRay
module Styles
class Murphy < Style
register_for :murphy
code_background = '#001129'
numbers_background = code_background
border_color = 'silver'
normal_color = '#C0C0C0'
CSS_MAIN_STYLES = <<-MAIN
.CodeRay {
background-color: #{code_background};
border: 1px solid #{border_color};
font-family: 'Courier New', 'Terminal', monospace;
color: #{normal_color};
}
.CodeRay pre { margin: 0px; }
div.CodeRay { }
span.CodeRay { white-space: pre; border: 0px; padding: 2px; }
table.CodeRay { border-collapse: collapse; width: 100%; padding: 2px; }
table.CodeRay td { padding: 2px 4px; vertical-align: top; }
.CodeRay .line_numbers, .CodeRay .no {
background-color: #{numbers_background};
color: gray;
text-align: right;
}
.CodeRay .line_numbers tt { font-weight: bold; }
.CodeRay .no { padding: 0px 4px; }
.CodeRay .code { width: 100%; }
ol.CodeRay { font-size: 10pt; }
ol.CodeRay li { white-space: pre; }
.CodeRay .code pre { overflow: auto; }
MAIN
TOKEN_COLORS = <<-'TOKENS'
.af { color:#00C; }
.an { color:#007; }
.av { color:#700; }
.aw { color:#C00; }
.bi { color:#509; font-weight:bold; }
.c { color:#555; background-color: black; }
.ch { color:#88F; }
.ch .k { color:#04D; }
.ch .dl { color:#039; }
.cl { color:#e9e; font-weight:bold; }
.co { color:#5ED; font-weight:bold; }
.cr { color:#0A0; }
.cv { color:#ccf; }
.df { color:#099; font-weight:bold; }
.di { color:#088; font-weight:bold; }
.dl { color:black; }
.do { color:#970; }
.ds { color:#D42; font-weight:bold; }
.e { color:#666; font-weight:bold; }
.er { color:#F00; background-color:#FAA; }
.ex { color:#F00; font-weight:bold; }
.fl { color:#60E; font-weight:bold; }
.fu { color:#5ed; font-weight:bold; }
.gv { color:#f84; }
.hx { color:#058; font-weight:bold; }
.i { color:#66f; font-weight:bold; }
.ic { color:#B44; font-weight:bold; }
.il { }
.in { color:#B2B; font-weight:bold; }
.iv { color:#aaf; }
.la { color:#970; font-weight:bold; }
.lv { color:#963; }
.oc { color:#40E; font-weight:bold; }
.of { color:#000; font-weight:bold; }
.op { }
.pc { color:#08f; font-weight:bold; }
.pd { color:#369; font-weight:bold; }
.pp { color:#579; }
.pt { color:#66f; font-weight:bold; }
.r { color:#5de; font-weight:bold; }
.rx { background-color:#221133; }
.rx .k { color:#f8f; }
.rx .dl { color:#f0f; }
.rx .mod { color:#f0b; }
.rx .fu { color:#404; font-weight: bold; }
.s { background-color:#331122; }
.s .s { background-color:#ffe0e0; }
.s .s .s { background-color:#ffd0d0; }
.s .k { color:#F88; }
.s .dl { color:#f55; }
.sh { background-color:#f0fff0; }
.sh .k { color:#2B2; }
.sh .dl { color:#161; }
.sy { color:#Fc8; }
.sy .k { color:#Fc8; }
.sy .dl { color:#F84; }
.ta { color:#070; }
.tf { color:#070; font-weight:bold; }
.ts { color:#D70; font-weight:bold; }
.ty { color:#339; font-weight:bold; }
.v { color:#036; }
.xt { color:#444; }
TOKENS
end
end
end

View File

@ -0,0 +1,71 @@
module CodeRay
class Tokens
ClassOfKind = Hash.new do |h, k|
h[k] = k.to_s
end
ClassOfKind.update with = {
:attribute_name => 'an',
:attribute_name_fat => 'af',
:attribute_value => 'av',
:attribute_value_fat => 'aw',
:bin => 'bi',
:char => 'ch',
:class => 'cl',
:class_variable => 'cv',
:color => 'cr',
:comment => 'c',
:constant => 'co',
:content => 'k',
:definition => 'df',
:delimiter => 'dl',
:directive => 'di',
:doc => 'do',
:doc_string => 'ds',
:entity => 'en',
:error => 'er',
:escape => 'e',
:exception => 'ex',
:float => 'fl',
:function => 'fu',
:global_variable => 'gv',
:hex => 'hx',
:include => 'ic',
:inline => 'il',
:inline_delimiter => 'idl',
:instance_variable => 'iv',
:integer => 'i',
:interpreted => 'in',
:label => 'la',
:local_variable => 'lv',
:modifier => 'mod',
:oct => 'oc',
:operator_fat => 'of',
:pre_constant => 'pc',
:pre_type => 'pt',
:predefined => 'pd',
:preprocessor => 'pp',
:regexp => 'rx',
:reserved => 'r',
:shell => 'sh',
:string => 's',
:symbol => 'sy',
:tag => 'ta',
:tag_fat => 'tf',
:tag_special => 'ts',
:type => 'ty',
:variable => 'v',
:xml_text => 'xt',
:ident => :NO_HIGHLIGHT, # 'id'
#:operator => 'op',
:operator => :NO_HIGHLIGHT, # 'op'
:space => :NO_HIGHLIGHT, # 'sp'
:plain => :NO_HIGHLIGHT,
}
ClassOfKind[:procedure] = ClassOfKind[:method] = ClassOfKind[:function]
ClassOfKind[:open] = ClassOfKind[:close] = ClassOfKind[:delimiter]
ClassOfKind[:nesting_delimiter] = ClassOfKind[:delimiter]
ClassOfKind[:escape] = ClassOfKind[:delimiter]
#ClassOfKind.default = ClassOfKind[:error] or raise 'no class found for :error!'
end
end

View File

@ -0,0 +1,383 @@
module CodeRay
# = Tokens
#
# The Tokens class represents a list of tokens returnd from
# a Scanner.
#
# A token is not a special object, just a two-element Array
# consisting of
# * the _token_ _kind_ (a Symbol representing the type of the token)
# * the _token_ _text_ (the original source of the token in a String)
#
# A token looks like this:
#
# [:comment, '# It looks like this']
# [:float, '3.1415926']
# [:error, 'äöü']
#
# Some scanners also yield some kind of sub-tokens, represented by special
# token texts, namely :open and :close .
#
# The Ruby scanner, for example, splits "a string" into:
#
# [
# [:open, :string],
# [:delimiter, '"'],
# [:content, 'a string'],
# [:delimiter, '"'],
# [:close, :string]
# ]
#
# Tokens is also the interface between Scanners and Encoders:
# The input is split and saved into a Tokens object. The Encoder
# then builds the output from this object.
#
# Thus, the syntax below becomes clear:
#
# CodeRay.scan('price = 2.59', :ruby).html
# # the Tokens object is here -------^
#
# See how small it is? ;)
#
# Tokens gives you the power to handle pre-scanned code very easily:
# You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
# that you put in your DB.
#
# Tokens' subclass TokenStream allows streaming to save memory.
class Tokens < Array
class << self
# Convert the token to a string.
#
# This format is used by Encoders.Tokens.
# It can be reverted using read_token.
def write_token text, type
if text.is_a? String
"#{type}\t#{escape(text)}\n"
else
":#{text}\t#{type}\t\n"
end
end
# Read a token from the string.
#
# Inversion of write_token.
#
# TODO Test this!
def read_token token
type, text = token.split("\t", 2)
if type[0] == ?:
[text.to_sym, type[1..-1].to_sym]
else
[type.to_sym, unescape(text)]
end
end
# Escapes a string for use in write_token.
def escape text
text.gsub(/[\n\\]/, '\\\\\&')
end
# Unescapes a string created by escape.
def unescape text
text.gsub(/\\[\n\\]/) { |m| m[1,1] }
end
end
# Whether the object is a TokenStream.
#
# Returns false.
def stream?
false
end
# Iterates over all tokens.
#
# If a filter is given, only tokens of that kind are yielded.
def each kind_filter = nil, &block
unless kind_filter
super(&block)
else
super() do |text, kind|
next unless kind == kind_filter
yield text, kind
end
end
end
# Iterates over all text tokens.
# Range tokens like [:open, :string] are left out.
#
# Example:
# tokens.each_text_token { |text, kind| text.replace html_escape(text) }
def each_text_token
each do |text, kind|
next unless text.is_a? ::String
yield text, kind
end
end
# Encode the tokens using encoder.
#
# encoder can be
# * a symbol like :html oder :statistic
# * an Encoder class
# * an Encoder object
#
# options are passed to the encoder.
def encode encoder, options = {}
unless encoder.is_a? Encoders::Encoder
unless encoder.is_a? Class
encoder_class = Encoders[encoder]
end
encoder = encoder_class.new options
end
encoder.encode_tokens self, options
end
# Turn into a string using Encoders::Text.
#
# +options+ are passed to the encoder if given.
def to_s options = {}
encode :text, options
end
# Redirects unknown methods to encoder calls.
#
# For example, if you call +tokens.html+, the HTML encoder
# is used to highlight the tokens.
def method_missing meth, options = {}
Encoders[meth].new(options).encode_tokens self
end
# Returns the tokens compressed by joining consecutive
# tokens of the same kind.
#
# This can not be undone, but should yield the same output
# in most Encoders. It basically makes the output smaller.
#
# Combined with dump, it saves space for the cost of time.
#
# If the scanner is written carefully, this is not required -
# for example, consecutive //-comment lines could already be
# joined in one comment token by the Scanner.
def optimize
print ' Tokens#optimize: before: %d - ' % size if $DEBUG
last_kind = last_text = nil
new = self.class.new
for text, kind in self
if text.is_a? String
if kind == last_kind
last_text << text
else
new << [last_text, last_kind] if last_kind
last_text = text
last_kind = kind
end
else
new << [last_text, last_kind] if last_kind
last_kind = last_text = nil
new << [text, kind]
end
end
new << [last_text, last_kind] if last_kind
print 'after: %d (%d saved = %2.0f%%)' %
[new.size, size - new.size, 1.0 - (new.size.to_f / size)] if $DEBUG
new
end
# Compact the object itself; see optimize.
def optimize!
replace optimize
end
# Ensure that all :open tokens have a correspondent :close one.
#
# TODO: Test this!
def fix
# Check token nesting using a stack of kinds.
opened = []
for token, kind in self
if token == :open
opened.push kind
elsif token == :close
expected = opened.pop
if kind != expected
# Unexpected :close; decide what to do based on the kind:
# - token was opened earlier: also close tokens in between
# - token was never opened: delete the :close (skip with next)
next unless opened.rindex expected
tokens << [:close, kind] until (kind = opened.pop) == expected
end
end
tokens << [token, kind]
end
# Close remaining opened tokens
tokens << [:close, kind] while kind = opened.pop
tokens
end
def fix!
replace fix
end
# Makes sure that:
# - newlines are single tokens
# (which means all other token are single-line)
# - there are no open tokens at the end the line
#
# This makes it simple for encoders that work line-oriented,
# like HTML with list-style numeration.
def split_into_lines
raise NotImplementedError
end
def split_into_lines!
replace split_into_lines
end
# Dumps the object into a String that can be saved
# in files or databases.
#
# The dump is created with Marshal.dump;
# In addition, it is gzipped using GZip.gzip.
#
# The returned String object includes Undumping
# so it has an #undump method. See Tokens.load.
#
# You can configure the level of compression,
# but the default value 7 should be what you want
# in most cases as it is a good compromise between
# speed and compression rate.
#
# See GZip module.
def dump gzip_level = 7
require 'coderay/helpers/gzip_simple'
dump = Marshal.dump self
dump = dump.gzip gzip_level
dump.extend Undumping
end
# The total size of the tokens.
# Should be equal to the input size before
# scanning.
def text_size
size = 0
each_text_token do |t, k|
size + t.size
end
size
end
# The total size of the tokens.
# Should be equal to the input size before
# scanning.
def text
map { |t, k| t if t.is_a? ::String }.join
end
# Include this module to give an object an #undump
# method.
#
# The string returned by Tokens.dump includes Undumping.
module Undumping
# Calls Tokens.load with itself.
def undump
Tokens.load self
end
end
# Undump the object using Marshal.load, then
# unzip it using GZip.gunzip.
#
# The result is commonly a Tokens object, but
# this is not guaranteed.
def Tokens.load dump
require 'coderay/helpers/gzip_simple'
dump = dump.gunzip
@dump = Marshal.load dump
end
end
# = TokenStream
#
# The TokenStream class is a fake Array without elements.
#
# It redirects the method << to a block given at creation.
#
# This allows scanners and Encoders to use streaming (no
# tokens are saved, the input is highlighted the same time it
# is scanned) with the same code.
#
# See CodeRay.encode_stream and CodeRay.scan_stream
class TokenStream < Tokens
# Whether the object is a TokenStream.
#
# Returns true.
def stream?
true
end
# The Array is empty, but size counts the tokens given by <<.
attr_reader :size
# Creates a new TokenStream that calls +block+ whenever
# its << method is called.
#
# Example:
#
# require 'coderay'
#
# token_stream = CodeRay::TokenStream.new do |kind, text|
# puts 'kind: %s, text size: %d.' % [kind, text.size]
# end
#
# token_stream << [:regexp, '/\d+/']
# #-> kind: rexpexp, text size: 5.
#
def initialize &block
raise ArgumentError, 'Block expected for streaming.' unless block
@callback = block
@size = 0
end
# Calls +block+ with +token+ and increments size.
#
# Returns self.
def << token
@callback.call token
@size += 1
self
end
# This method is not implemented due to speed reasons. Use Tokens.
def text_size
raise NotImplementedError,
'This method is not implemented due to speed reasons.'
end
# A TokenStream cannot be dumped. Use Tokens.
def dump
raise NotImplementedError, 'A TokenStream cannot be dumped.'
end
# A TokenStream cannot be optimized. Use Tokens.
def optimize
raise NotImplementedError, 'A TokenStream cannot be optimized.'
end
end
# Token name abbreviations
require 'coderay/token_classes'
end