dev/c/tex_parser/tex_parser.c

231 lines
5.5 KiB
C

#include "tex_parser.h"
#include <string.h>
#include <ctype.h>
#include "tex_parser_tags.h"
#include "tex_parser_const.h"
#include "zalloc_ext.h"
#include "zalloc.h"
#include "c_const.h"
#include "xerror.h"
#include "zerror.h"
/* only for debug */
#define __TEX_PARSER_DEBUG
#include <stdio.h>
void tex_parse(const char *source, size_t len, struct tex_struct_s **tex_struct, struct zerror_s **error)
{
if (!len)
len = strlen(source);
enum where_e
{
IN_UNDEF = 0, // undefined place
IN_COMMENT, // any comment
IN_TAG, // any tag
IN_TAG_UNKNOWN, // unknown tag
IN_SPACE, // space, \tag { for example
IN_TAGPARM, // in \tag{} curly braces
IN_TAG_BEGIN,
IN_TAG_END,
IN_TAG_TABULARNEWLINE,
IN_TABLE,
IN_TABLE_PARAMS, // column properties devided by '>'
IN_TABLE_TAG_HLINE,
IN_TABLE_TAG_MULTICOLUMN,
IN_TABLE_COMMENT,
IN_TABLE_TAG,
IN_TABLE_TAG_UNKNOWN,
IN_TABLE_SPACE,
IN_TABLE_TAGPARM,
IN_TABLE_TAG_CLINE,
IN_TABLE_TAG_ENDFOOT,
IN_TABLE_TAG_ENDHEAD,
IN_TABLE_TAG_HSPACE,
IN_TABLE_TAG_MULTIROW,
IN_FORMULA, // in $...$
} where_stack[MAX_TEX_STACK_LEVEL] = {IN_UNDEF};
char *tag = NULL, *param = NULL;
size_t i = 0;
size_t tex_level = 0;
for (i = 0; i < len; i++) {
/* stack checking */
if (tex_level + 1 == MAX_TEX_STACK_LEVEL) {
z_set_error(error, TEX_PARSER_DOMAIN, TEX_PARSER_ERROR_STACK,
"tex_parse(): stack overflow, symbol %c(0x%2.2x) at position %d",
source[i], (u_int8_t)source[i], i);
return;
}
/* read one character from input stream */
char c = source[i];
#ifdef __TEX_PARSER_DEBUG
printf("tex_level = %lu, c = %c\n", tex_level, c);
#endif
/* looking at where_stack and encountered character */
switch (where_stack[tex_level]) {
case IN_UNDEF:
if (c == '\\') {
zclear(&tag);
where_stack[++tex_level] = IN_TAG;
} else if (isspace(c)) {
where_stack[++tex_level] = IN_SPACE;
} else if (c == '%') {
where_stack[++tex_level] = IN_COMMENT;
} else if (isgraph(c) || c < 0) {
} else {
z_set_error(error, TEX_PARSER_DOMAIN, TEX_PARSER_ERROR_UNEXPECTED_SYMBOL,
"tex_parse(): IN_UNDEF unexpected symbol %c(0x%2.2x) at position %d",
source[i], (u_int8_t)source[i], i);
return;
}
break;
case IN_COMMENT:
if (c == '\r' || c == '\n') {
where_stack[tex_level--] = IN_UNDEF;
}
break;
case IN_TAG:
if (isalnum(c)) {
tag = zalloc_append8_str(tag, c);
} else if (!tag && (c == '\\' || c == ','
|| c == '%' || c == '_')) {
where_stack[tex_level--] = IN_UNDEF;
} else if (!tag) {
z_set_error(error, TEX_PARSER_DOMAIN, TEX_PARSER_ERROR_UNEXPECTED_SYMBOL,
"tex_parse(): IN_TAG empty tag, symbol %c(0x%2.2x) at position %d",
source[i], (u_int8_t)source[i], i);
return;
} else {
if (!strcmp(tag, TEX_TAG_BEGIN))
where_stack[tex_level] = IN_TAG_BEGIN;
else if (!strcmp(tag, TEX_TAG_END))
where_stack[tex_level] = IN_TAG_END;
else if (!strcmp(tag, TEX_TAG_TABULARNEWLINE))
where_stack[tex_level] = IN_TAG_TABULARNEWLINE;
else
where_stack[tex_level] = IN_TAG_UNKNOWN;
#ifdef __TEX_PARSER_DEBUG
printf("\\%s\n", tag);
#endif
zclear(&tag);
i--;
}
break;
case IN_SPACE:
if (!isspace(c)) {
where_stack[tex_level--] = IN_UNDEF;
i--;
}
break;
case IN_TAGPARM:
if (c == '{' || c == '[') { // tag params
zclear(&param);
where_stack[++tex_level] = IN_TAGPARM;
} else if (c == '}' || c == ']') { // end tag params
#ifdef __TEX_PARSER_DEBUG
if (param)
printf("IN_TAGPARM: {%s}\n", param);
#endif
where_stack[tex_level--] = IN_UNDEF;
//~ if (tex_level && where_stack[tex_level - 1] == IN_TAG_BEGIN
//~ && !strcmp(param, TEX_PARAM_TABLE))
//~ where_stack[tex_level++] = IN_TABLE;//??????????????????????
} else if (c == '\\') { // new tag
zclear(&tag);
where_stack[++tex_level] = IN_TAG;
} else if (c == '%') {
where_stack[++tex_level] = IN_COMMENT;
} else if (isgraph(c) || isspace(c) || c < 0) {
param = zalloc_append8_str(param, c);
} else {
z_set_error(error, TEX_PARSER_DOMAIN, TEX_PARSER_ERROR_UNEXPECTED_SYMBOL,
"tex_parse(): IN_TAGPARM unexpected symbol %c(0x%2.2x) at position %d",
source[i], (u_int8_t)source[i], i);
return;
}
break;
case IN_TAG_UNKNOWN:
case IN_TAG_BEGIN:
case IN_TAG_END:
case IN_TAG_TABULARNEWLINE:
if (c == '{' || c == '[') {
zclear(&param);
where_stack[++tex_level] = IN_TAGPARM;
} else if (c == '}' || c == ']') {
#ifdef __TEX_PARSER_DEBUG
if (param)
printf("IN_TAGPARM: {%s}\n", param);
#endif
where_stack[tex_level--] = IN_UNDEF;
i--;
} else if (c == '\\') {
zclear(&tag);
where_stack[tex_level] = IN_TAG;
} else if (isspace(c)) {
where_stack[++tex_level] = IN_SPACE;
} else if (c == '%')
where_stack[++tex_level] = IN_COMMENT;
else if (isgraph(c) || c < 0) {
} else {
z_set_error(error, TEX_PARSER_DOMAIN, TEX_PARSER_ERROR_UNEXPECTED_SYMBOL,
"tex_parse(): IN_%d unexpected symbol %c(0x%2.2x) at position %d",
where_stack[tex_level], source[i], (u_int8_t)source[i], i);
return;
}
break;
case IN_FORMULA:
break;
default:
z_set_error(error, TEX_PARSER_DOMAIN, TEX_PARSER_ERROR_PLACE_UNKNOWN,
"tex_parse(): IN_%d unknown place, symbol %c(0x%2.2x) at position %d",
where_stack[tex_level], source[i], (u_int8_t)source[i], i);
return;
break;
}
}
}