286 lines
5.4 KiB
C
286 lines
5.4 KiB
C
#include "tex_parser.h"
|
|
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
#include "tex_parser_tags.h"
|
|
#include "tex_parser_const.h"
|
|
#include "zalloc_ext.h"
|
|
#include "zalloc.h"
|
|
#include "c_const.h"
|
|
|
|
/* only for debug */
|
|
#define __TEX_PARSER_DEBUG 0
|
|
#include <stdio.h>
|
|
|
|
|
|
int tex_parse(const char *source, size_t len, struct zerror_s *error)
|
|
{
|
|
memset(error, 0, sizeof(*error));
|
|
|
|
if (!len)
|
|
len = strlen(source);
|
|
|
|
enum where_e
|
|
{
|
|
IN_UNDEF = 0, // undefined place
|
|
IN_COMMENT, // any comment
|
|
IN_TAG, // any tag
|
|
IN_TAG_UNKNOWN, // unknown tag
|
|
IN_SPACE, // space, \tag { for example
|
|
IN_TAGPARM, // in \tag{} curly braces
|
|
IN_TAG_BEGIN,
|
|
IN_TAG_CLINE,
|
|
IN_TAG_END,
|
|
IN_TAG_ENDFOOT,
|
|
IN_TAG_ENDHEAD,
|
|
IN_TAG_HLINE,
|
|
IN_TAG_HSPACE,
|
|
IN_TAG_MULTICOLUMN,
|
|
IN_TAG_MULTIROW,
|
|
IN_TAG_TABULARNEWLINE,
|
|
IN_FORMULA, // in $...$
|
|
|
|
} where_stack[MAX_TEX_STACK_LEVEL] = {IN_UNDEF};
|
|
|
|
char *tag = NULL;
|
|
size_t i = 0;
|
|
size_t tex_level = 0;
|
|
|
|
for (i = 0; i < len; i++) {
|
|
|
|
/* stack checking */
|
|
if (tex_level + 1 == MAX_TEX_STACK_LEVEL) {
|
|
error->code = (int)i;
|
|
error->message = "stack overflow";
|
|
return error->code;
|
|
}
|
|
|
|
/* read one character from input stream */
|
|
char c = source[i];
|
|
#ifdef __TEX_PARSER_DEBUG
|
|
printf("tex_level = %lu, c = %c\n", tex_level, c);
|
|
#endif
|
|
|
|
/* looking at where_stack and encountered character */
|
|
switch (where_stack[tex_level]) {
|
|
case IN_UNDEF:
|
|
if (c == '\\') {
|
|
tag = zfree_null(tag);
|
|
where_stack[++tex_level] = IN_TAG;
|
|
|
|
|
|
} else if (isspace(c)) {
|
|
where_stack[++tex_level] = IN_SPACE;
|
|
|
|
} else if (c == '%') {
|
|
where_stack[++tex_level] = IN_COMMENT;
|
|
|
|
} else if (isgraph(c) || c < 0) {
|
|
|
|
} else {
|
|
error->code = (int)i;
|
|
error->message = "unexpected symbol (IN_UDEF)";
|
|
return error->code;
|
|
}
|
|
break;
|
|
|
|
case IN_COMMENT:
|
|
if (c == '\r' || c == '\n') {
|
|
where_stack[tex_level--] = IN_UNDEF;
|
|
}
|
|
break;
|
|
|
|
case IN_TAG:
|
|
if (isalnum(c)) {
|
|
tag = zalloc_append8_str(tag, c);
|
|
|
|
} else if (!tag && (c == '\\' || c == ','
|
|
|| c == '%' || c == '_')) {
|
|
where_stack[tex_level--] = IN_UNDEF;
|
|
|
|
} else if (!tag) {
|
|
error->code = (int)i;
|
|
error->message = "empty tag (IN_TAG)";
|
|
return error->code;
|
|
|
|
} else {
|
|
if (!strcmp(tag, TEX_TAG_BEGIN))
|
|
where_stack[tex_level] = IN_TAG_BEGIN;
|
|
|
|
else if (!strcmp(tag, TEX_TAG_CLINE))
|
|
where_stack[tex_level] = IN_TAG_CLINE;
|
|
|
|
else if (!strcmp(tag, TEX_TAG_END))
|
|
where_stack[tex_level] = IN_TAG_END;
|
|
|
|
else if (!strcmp(tag, TEX_TAG_ENDFOOT))
|
|
where_stack[tex_level] = IN_TAG_ENDFOOT;
|
|
|
|
else if (!strcmp(tag, TEX_TAG_ENDHEAD))
|
|
where_stack[tex_level] = IN_TAG_ENDHEAD;
|
|
|
|
else if (!strcmp(tag, TEX_TAG_HLINE))
|
|
where_stack[tex_level] = IN_TAG_HLINE;
|
|
|
|
else if (!strcmp(tag, TEX_TAG_HSPACE))
|
|
where_stack[tex_level] = IN_TAG_HSPACE;
|
|
|
|
else if (!strcmp(tag, TEX_TAG_MULTICOLUMN))
|
|
where_stack[tex_level] = IN_TAG_MULTICOLUMN;
|
|
|
|
else if (!strcmp(tag, TEX_TAG_MULTIROW))
|
|
where_stack[tex_level] = IN_TAG_MULTIROW;
|
|
|
|
else if (!strcmp(tag, TEX_TAG_TABULARNEWLINE))
|
|
where_stack[tex_level] = IN_TAG_TABULARNEWLINE;
|
|
|
|
else
|
|
where_stack[tex_level] = IN_TAG_UNKNOWN;
|
|
|
|
#ifdef __TEX_PARSER_DEBUG
|
|
printf("\\%s\n", tag);
|
|
#endif
|
|
tag = zfree_null(tag);
|
|
|
|
i--;
|
|
}
|
|
break;
|
|
|
|
case IN_SPACE:
|
|
if (!isspace(c)) {
|
|
where_stack[tex_level--] = IN_UNDEF;
|
|
i--;
|
|
}
|
|
break;
|
|
|
|
case IN_TAGPARM:
|
|
if (c == '{' || c == '[') { // tag params
|
|
where_stack[++tex_level] = IN_TAGPARM;
|
|
|
|
} else if (c == '}' || c == ']') { // end tag params
|
|
where_stack[tex_level--] = IN_UNDEF;
|
|
|
|
} else if (c == '\\') { // new tag
|
|
tag = zfree_null(tag);
|
|
where_stack[++tex_level] = IN_TAG;
|
|
|
|
} else if (isspace(c)) {
|
|
where_stack[++tex_level] = IN_SPACE;
|
|
|
|
} else if (c == '%') {
|
|
where_stack[++tex_level] = IN_COMMENT;
|
|
|
|
} else if (isgraph(c) || c < 0) {
|
|
|
|
} else {
|
|
error->code = (int)i;
|
|
error->message = "unexpected symbol (IN_TAGPARM)";
|
|
return error->code;
|
|
}
|
|
break;
|
|
|
|
case IN_TAG_UNKNOWN:
|
|
case IN_TAG_BEGIN:
|
|
case IN_TAG_CLINE:
|
|
case IN_TAG_END:
|
|
case IN_TAG_ENDFOOT:
|
|
case IN_TAG_ENDHEAD:
|
|
case IN_TAG_HLINE:
|
|
case IN_TAG_HSPACE:
|
|
case IN_TAG_MULTICOLUMN:
|
|
case IN_TAG_MULTIROW:
|
|
case IN_TAG_TABULARNEWLINE:
|
|
switch(where_stack[tex_level]) {
|
|
|
|
case IN_TAG_UNKNOWN:
|
|
|
|
break;
|
|
|
|
case IN_TAG_BEGIN:
|
|
|
|
break;
|
|
|
|
case IN_TAG_CLINE:
|
|
|
|
break;
|
|
|
|
case IN_TAG_END:
|
|
|
|
break;
|
|
|
|
case IN_TAG_ENDFOOT:
|
|
|
|
break;
|
|
|
|
case IN_TAG_ENDHEAD:
|
|
|
|
break;
|
|
|
|
case IN_TAG_HLINE:
|
|
|
|
break;
|
|
|
|
case IN_TAG_HSPACE:
|
|
|
|
break;
|
|
|
|
case IN_TAG_MULTICOLUMN:
|
|
|
|
break;
|
|
|
|
case IN_TAG_MULTIROW:
|
|
|
|
break;
|
|
|
|
case IN_TAG_TABULARNEWLINE:
|
|
|
|
break;
|
|
|
|
default:
|
|
error->code = (int)i;
|
|
error->message = "unknown error (IN_TAG_))";
|
|
return error->code;
|
|
break;
|
|
}
|
|
|
|
if (c == '{' || c == '[')
|
|
where_stack[++tex_level] = IN_TAGPARM;
|
|
|
|
else if (c == '}' || c == ']') {
|
|
where_stack[tex_level--] = IN_UNDEF;
|
|
i--;
|
|
|
|
} else if (c == '\\') {
|
|
tag = zfree_null(tag);
|
|
where_stack[tex_level] = IN_TAG;
|
|
|
|
} else if (isspace(c))
|
|
where_stack[++tex_level] = IN_SPACE;
|
|
|
|
else if (c == '%')
|
|
where_stack[++tex_level] = IN_COMMENT;
|
|
|
|
else if (isgraph(c) || c < 0) {
|
|
|
|
} else {
|
|
error->code = (int)i;
|
|
error->message = "unexpected symbol (IN_TAG_)";
|
|
return error->code;
|
|
}
|
|
break;
|
|
|
|
case IN_FORMULA:
|
|
|
|
break;
|
|
|
|
default:
|
|
error->code = (int)i;
|
|
error->message = "unknown error";
|
|
return error->code;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|