LAview.LaTeX-Struct/src/latex-struct/Parser.vala

359 lines
9.6 KiB
Vala

namespace LAview {
/**
* LaTeX Parsers.
*/
namespace Parsers {
/**
* LaTeX Parser Error Type.
*/
public errordomain ParseError {
/**
* Undefined Error.
*/
UNDEFINED,
/**
* Cann't find end of subdoc.
*/
ORPHAN_BEGIN,
/**
* End tag without begin tag.
*/
ORPHAN_END,
/**
* Error in subdoc parsing.
*/
SUBDOC,
}
abstract class Parser : Object {
protected string err_str = "";
/* links to group of scanners */
protected unowned Array<Link> links = null;
/* escaped TeX document */
protected unowned string contents = null;
/* standard GLib Scanner */
protected Scanner scanner = new Scanner (null);
/* tokens_sequence->class table */
protected Node<uint> tokens = new Node<uint> ();
protected Node<uint> stop_tokens = new Node<uint> ();
/* local tokens_sequence->class table */
protected Node<uint> local_tokens_to_class_start = new Node<uint> ();
protected Node<uint> local_tokens_to_class_stop = new Node<uint> ();
/* contents location in global document */
protected size_t line = 0;
protected long position = 0;
/* fifo-queue of tokens */
protected Queue<SymbPos?> symb_fifo = new Queue<SymbPos?> ();
protected struct SymbPos {
public TokenType token;
public uint line;
public long pos;
public uint abspos;
public uint length;
}
/* Last symbol position pushed to fifo */
protected SymbPos last_symb_pos;
/* Last symbol position pushed to fifo */
protected SymbPos subdoc_start;
/* child class and level */
protected uint child_tag = 0;
protected uint child_level = 0;
protected bool in_child_params = false;
protected uint child_param_branch_level = 0;
/* back-slash counter for one-line comments */
protected uint back_slash_counter = 0;
protected Parser (Array<Link> links) {
/* initializing scanner links */
this.links = links;
/* adjust lexing behaviour to suit our needs */
scanner.config.cset_skip_characters = "";
scanner.config.cset_identifier_first = CharacterSet.a_2_z + CharacterSet.DIGITS + "\\";
scanner.config.cset_identifier_nth = CharacterSet.a_2_z + CharacterSet.A_2_Z + CharacterSet.DIGITS;
scanner.config.cpair_comment_single = "%\n";
scanner.config.scan_float = false;
scanner.config.symbol_2_token = true;
scanner.config.scan_string_sq = false; // See bug #448
scanner.config.scan_string_dq = false; // See bug #448
/* set custom error message handler */
scanner.msg_handler = null;
/* load symbols into the self using GLib Quarks */
load_symbols (links);
/* generate tokens table (tree) */
build_tree (ref tokens, links, false);
/* generate stop_tokens table (tree) */
build_tree (ref stop_tokens, links, true);
/* give the error handler an idea on how the input is named */
scanner.input_name = "TeX text";
}
public abstract IDoc parse (string contents, size_t line, long position) throws ParseError;
void vprefix_error (SymbPos symb_pos, string format, va_list args) {
err_str = "\n" + err_str;
var line = symb_pos.line;
long position = symb_pos.pos;
if (line == 0)
position += this.position;
size_t nlines; long i;
for (i = 0, nlines = 0; contents[i] != '\0' && nlines < line; ++i)
if ('\n' == contents[i] || '\r' == contents[i])
++nlines;
var unparsed_str = contents[i:contents.length].split ("\n"); // FIXME: MacOS newline '\r' characters...
var str = unparsed_str[0][0:position];
var compressed = str.compress ();
position = compressed.length + 1;
var arrow_str = string.nfill (position - 1, ' ') + "^";
compressed = unparsed_str[0].compress ();
str = format.vprintf (args);
err_str = "%s:%lu:%lu: %s\n%s\n%s\n%s".printf (get_type().name(),
this.line + line + 1,
position,
str,
compressed,
arrow_str,
err_str);
}
protected void prefix_error (SymbPos symb_pos, string format, ...) {
var list = va_list ();
vprefix_error (symb_pos, format, list);
}
protected void load_symbols (Array<Link> links) {
for (var i = 1; i < links.length; ++i) {
for (var j = 0; j < links.index (i).begin.length; ++j)
if (links.index (i).begin[j].length > 1)
scanner.scope_add_symbol (0, links.index (i).begin[j], (void*)(Quark.from_string (links.index (i).begin[j]) + TokenType.LAST));
for (var j = 0; j < links.index (i).end.length; ++j)
if (links.index (i).end[j].length > 1)
scanner.scope_add_symbol (0, links.index (i).end[j], (void*)(Quark.from_string (links.index (i).end[j]) + TokenType.LAST));
}
}
protected void build_tree (ref Node<uint> tokens, Array<Link> links, bool stop_tree) {
/* generate tokens table (tree) */
tokens = new Node<uint> ();
for (var i = 1; i < links.length; ++i) {
/* if current class has no begin tags */
if (!stop_tree && links.index (i).begin.length == 0
|| stop_tree && links.index (i).end.length == 0)
continue;
unowned Node<uint> parent_node = tokens;
unowned string[] symb_seq = stop_tree ? links.index (i).end: links.index (i).begin;
Quark symb_quark;
unowned Node child_node;
/* insert begin tags of all links into tree */
for (var j = 0; j < symb_seq.length; ++j) {
assert (symb_seq[j] != null && symb_seq.length != 0);
if (symb_seq[j].length > 1)
symb_quark = Quark.from_string(symb_seq[j]) + TokenType.LAST;
else
symb_quark = symb_seq[j][0];
child_node = parent_node.find_child (TraverseFlags.NON_LEAVES, symb_quark);
/* append node with symb_quark token */
if (child_node == null)
child_node = parent_node.append_data (symb_quark);
parent_node = child_node;
}
/* check for identical tokens lists and be shure that we create leaf for class id */
assert (parent_node.first_child () == null);
/* append leaf with class id */
parent_node.append_data (i);
}
}
protected void count_branches () {
if (in_child_params) {
switch (fifo_peek_head ().token) {
case TokenType.LEFT_CURLY:
case TokenType.LEFT_BRACE:
++child_param_branch_level;
break;
case TokenType.RIGHT_CURLY:
case TokenType.RIGHT_BRACE:
--child_param_branch_level;
break;
default:
break;
}
if (child_param_branch_level == 0
&& TokenType.LEFT_CURLY != fifo_peek_nth(1).token
&& TokenType.LEFT_BRACE != fifo_peek_nth(1).token)
in_child_params = false;
}
}
protected uint fifo_is_tag (Node<uint> tokens, out uint tag) {
uint match_length;
tag = 0;
/* search for tokens subsequence in tokens tree */
for (match_length = 0; ; ++match_length) {
if (tokens == null
|| TokenType.ERROR == fifo_peek_nth (match_length).token
|| TokenType.EOF == fifo_peek_nth (match_length).token) {
match_length = 0;
break;
}
tokens = tokens.find_child (TraverseFlags.NON_LEAVES,
fifo_peek_nth (match_length).token);
if (tokens != null && tokens.first_child().is_leaf()) {
tag = tokens.first_child().data;
++match_length;
break;
}
}
return match_length;
}
protected void fifo_pop () {
var ret = fifo_peek_head ();
if (ret.token != TokenType.ERROR && ret.token != TokenType.EOF)
symb_fifo.pop_head ();
}
protected SymbPos fifo_peek_head () {
if (symb_fifo.length == 0)
fifo_push ();
return symb_fifo.peek_head();
}
protected SymbPos fifo_peek_nth (uint n) {
unowned SymbPos ret;
while (n >= symb_fifo.length && fifo_push ());
if (n < symb_fifo.length)
ret = symb_fifo.peek_nth (n);
else
ret = symb_fifo.peek_tail ();
return ret;
}
bool fifo_push () {
var p = contents.offset (last_symb_pos.abspos);
scanner.get_next_token ();
if ('\\' == scanner.token) {
++back_slash_counter;
if (back_slash_counter % 4 == 0)
scanner.config.cpair_comment_single = "%\n";
else if (back_slash_counter % 2 == 0)
scanner.config.cpair_comment_single = null;
} else {
back_slash_counter = 0;
}
scanner.peek_next_token ();
if (scanner.token == TokenType.ERROR
|| scanner.token == TokenType.EOF) {
if (symb_fifo.length == 0) {
var sp = SymbPos ();
sp.token = scanner.token;
symb_fifo.push_tail (sp);
}
return false;
}
var sp = SymbPos ();
sp.token = scanner.token;
last_symb_pos.token = scanner.token;
sp.line = scanner.cur_line () - 1;
sp.pos = scanner.cur_position ();
sp.length = 1;
if (scanner.token > 255 && ((Quark)(scanner.token - TokenType.LAST)).to_string() != null) {
sp.length = ((Quark)(scanner.token - TokenType.LAST)).to_string().length;
sp.pos -= sp.length;
} else if (sp.pos != 0) {
--sp.pos;
}
last_symb_pos.length = sp.length;
/* current scanner's abspos evaluation */
while (last_symb_pos.line < sp.line
|| last_symb_pos.line == sp.line && last_symb_pos.pos < sp.pos) {
if ('\n' == p[0] || '\r' == p[0]) {
++last_symb_pos.line;
last_symb_pos.pos = 0;
} else {
++last_symb_pos.pos;
}
++last_symb_pos.abspos;
p = p.offset (1);
}
sp.abspos = last_symb_pos.abspos;
symb_fifo.push_tail (sp);
return true;
}
}
}
}