dev/c/tex_parser/tex_parser.c

286 lines
5.4 KiB
C

#include "tex_parser.h"
#include <string.h>
#include <ctype.h>
#include "tex_parser_tags.h"
#include "zalloc_ext.h"
#include "zalloc.h"
#include "c_const.h"
/* only for debug */
#define __TEX_PARSER_DEBUG 0
#include <stdio.h>
int tex_parse(const char *source, size_t len, struct zerror_s *error)
{
error->code = 0;
error->message = NULL;
if (!len)
len = strlen(source);
enum where_e
{
IN_UNDEF = 0, // undefined place
IN_COMMENT, // any comment
IN_TAG, // any tag
IN_TAG_UNKNOWN, // unknown tag
IN_SPACE, // space, \tag { for example
IN_TAGPARM, // in \tag{} curly braces
IN_TAG_BEGIN,
IN_TAG_CLINE,
IN_TAG_END,
IN_TAG_ENDFOOT,
IN_TAG_ENDHEAD,
IN_TAG_HLINE,
IN_TAG_HSPACE,
IN_TAG_MULTICOLUMN,
IN_TAG_MULTIROW,
IN_TAG_TABULARNEWLINE,
IN_FORMULA, // in $...$
} where_stack[MAX_TEX_STACK_LEVEL] = {IN_UNDEF};
char *tag = NULL;
size_t i = 0;
size_t tex_level = 0;
for (i = 0; i < len; i++) {
/* stack checking */
if (tex_level + 1 == MAX_TEX_STACK_LEVEL) {
error->code = (int)i;
error->message = "stack overflow";
return error->code;
}
/* read one character from input stream */
char c = source[i];
#ifdef __TEX_PARSER_DEBUG
printf("tex_level = %lu, c = %c\n", tex_level, c);
#endif
/* looking at where_stack and encountered character */
switch (where_stack[tex_level]) {
case IN_UNDEF:
if (c == '\\') {
tag = zfree_null(tag);
where_stack[++tex_level] = IN_TAG;
} else if (isspace(c)) {
where_stack[++tex_level] = IN_SPACE;
} else if (c == '%') {
where_stack[++tex_level] = IN_COMMENT;
} else if (isgraph(c) || c < 0) {
} else {
error->code = (int)i;
error->message = "unexpected symbol (IN_UDEF)";
return error->code;
}
break;
case IN_COMMENT:
if (c == '\r' || c == '\n') {
where_stack[tex_level--] = IN_UNDEF;
}
break;
case IN_TAG:
if (isalnum(c)) {
tag = zalloc_append8_str(tag, c);
} else if (!tag && (c == '\\' || c == ','
|| c == '%' || c == '_')) {
where_stack[tex_level--] = IN_UNDEF;
} else if (!tag) {
error->code = (int)i;
error->message = "empty tag (IN_TAG)";
return error->code;
} else {
if (!strcmp(tag, TEX_TAG_BEGIN))
where_stack[tex_level] = IN_TAG_BEGIN;
else if (!strcmp(tag, TEX_TAG_CLINE))
where_stack[tex_level] = IN_TAG_CLINE;
else if (!strcmp(tag, TEX_TAG_END))
where_stack[tex_level] = IN_TAG_END;
else if (!strcmp(tag, TEX_TAG_ENDFOOT))
where_stack[tex_level] = IN_TAG_ENDFOOT;
else if (!strcmp(tag, TEX_TAG_ENDHEAD))
where_stack[tex_level] = IN_TAG_ENDHEAD;
else if (!strcmp(tag, TEX_TAG_HLINE))
where_stack[tex_level] = IN_TAG_HLINE;
else if (!strcmp(tag, TEX_TAG_HSPACE))
where_stack[tex_level] = IN_TAG_HSPACE;
else if (!strcmp(tag, TEX_TAG_MULTICOLUMN))
where_stack[tex_level] = IN_TAG_MULTICOLUMN;
else if (!strcmp(tag, TEX_TAG_MULTIROW))
where_stack[tex_level] = IN_TAG_MULTIROW;
else if (!strcmp(tag, TEX_TAG_TABULARNEWLINE))
where_stack[tex_level] = IN_TAG_TABULARNEWLINE;
else
where_stack[tex_level] = IN_TAG_UNKNOWN;
#ifdef __TEX_PARSER_DEBUG
printf("\\%s\n", tag);
#endif
tag = zfree_null(tag);
i--;
}
break;
case IN_SPACE:
if (!isspace(c)) {
where_stack[tex_level--] = IN_UNDEF;
i--;
}
break;
case IN_TAGPARM:
if (c == '{' || c == '[') { // tag params
where_stack[++tex_level] = IN_TAGPARM;
} else if (c == '}' || c == ']') { // end tag params
where_stack[tex_level--] = IN_UNDEF;
} else if (c == '\\') { // new tag
tag = zfree_null(tag);
where_stack[++tex_level] = IN_TAG;
} else if (isspace(c)) {
where_stack[++tex_level] = IN_SPACE;
} else if (c == '%') {
where_stack[++tex_level] = IN_COMMENT;
} else if (isgraph(c) || c < 0) {
} else {
error->code = (int)i;
error->message = "unexpected symbol (IN_TAGPARM)";
return error->code;
}
break;
case IN_TAG_UNKNOWN:
case IN_TAG_BEGIN:
case IN_TAG_CLINE:
case IN_TAG_END:
case IN_TAG_ENDFOOT:
case IN_TAG_ENDHEAD:
case IN_TAG_HLINE:
case IN_TAG_HSPACE:
case IN_TAG_MULTICOLUMN:
case IN_TAG_MULTIROW:
case IN_TAG_TABULARNEWLINE:
switch(where_stack[tex_level]) {
case IN_TAG_UNKNOWN:
break;
case IN_TAG_BEGIN:
break;
case IN_TAG_CLINE:
break;
case IN_TAG_END:
break;
case IN_TAG_ENDFOOT:
break;
case IN_TAG_ENDHEAD:
break;
case IN_TAG_HLINE:
break;
case IN_TAG_HSPACE:
break;
case IN_TAG_MULTICOLUMN:
break;
case IN_TAG_MULTIROW:
break;
case IN_TAG_TABULARNEWLINE:
break;
default:
error->code = (int)i;
error->message = "unknown error (IN_TAG_))";
return error->code;
break;
}
if (c == '{' || c == '[')
where_stack[++tex_level] = IN_TAGPARM;
else if (c == '}' || c == ']') {
where_stack[tex_level--] = IN_UNDEF;
i--;
} else if (c == '\\') {
tag = zfree_null(tag);
where_stack[tex_level] = IN_TAG;
} else if (isspace(c))
where_stack[++tex_level] = IN_SPACE;
else if (c == '%')
where_stack[++tex_level] = IN_COMMENT;
else if (isgraph(c) || c < 0) {
} else {
error->code = (int)i;
error->message = "unexpected symbol (IN_TAG_)";
return error->code;
}
break;
case IN_FORMULA:
break;
default:
error->code = (int)i;
error->message = "unknown error";
return error->code;
break;
}
}
return 0;
}