#include "tex_parser.h" #include #include #include "tex_parser_tags.h" #include "tex_parser_const.h" #include "zalloc_ext.h" #include "zalloc.h" #include "c_const.h" #include "xerror.h" #include "zerror.h" /* only for debug */ #define __TEX_PARSER_DEBUG 0 #include void tex_parse(const char *source, size_t len, struct zerror_s **error) { memset(error, 0, sizeof(*error)); if (!len) len = strlen(source); enum where_e { IN_UNDEF = 0, // undefined place IN_COMMENT, // any comment IN_TAG, // any tag IN_TAG_UNKNOWN, // unknown tag IN_SPACE, // space, \tag { for example IN_TAGPARM, // in \tag{} curly braces IN_TAG_BEGIN, IN_TAG_CLINE, IN_TAG_END, IN_TAG_ENDFOOT, IN_TAG_ENDHEAD, IN_TAG_HLINE, IN_TAG_HSPACE, IN_TAG_MULTICOLUMN, IN_TAG_MULTIROW, IN_TAG_TABULARNEWLINE, IN_FORMULA, // in $...$ } where_stack[MAX_TEX_STACK_LEVEL] = {IN_UNDEF}; char *tag = NULL, *param = NULL; size_t i = 0; size_t tex_level = 0; for (i = 0; i < len; i++) { /* stack checking */ if (tex_level + 1 == MAX_TEX_STACK_LEVEL) { z_set_error(error, TEX_PARSER_DOMAIN, TEX_PARSER_ERROR_STACK, "tex_parse(): stack overflow, symbol %c(0x%2.2x) at position %d", source[i], (u_int8_t)source[i], i); return; } /* read one character from input stream */ char c = source[i]; #ifdef __TEX_PARSER_DEBUG printf("tex_level = %lu, c = %c\n", tex_level, c); #endif /* looking at where_stack and encountered character */ switch (where_stack[tex_level]) { case IN_UNDEF: if (c == '\\') { zclear(&tag); where_stack[++tex_level] = IN_TAG; } else if (isspace(c)) { where_stack[++tex_level] = IN_SPACE; } else if (c == '%') { where_stack[++tex_level] = IN_COMMENT; } else if (isgraph(c) || c < 0) { } else { z_set_error(error, TEX_PARSER_DOMAIN, TEX_PARSER_ERROR_UNEXPECTED_SYMBOL, "tex_parse(): IN_UNDEF unexpected symbol %c(0x%2.2x) at position %d", source[i], (u_int8_t)source[i], i); return; } break; case IN_COMMENT: if (c == '\r' || c == '\n') { where_stack[tex_level--] = IN_UNDEF; } break; case IN_TAG: if (isalnum(c)) { tag = zalloc_append8_str(tag, c); } else if (!tag && (c == '\\' || c == ',' || c == '%' || c == '_')) { where_stack[tex_level--] = IN_UNDEF; } else if (!tag) { z_set_error(error, TEX_PARSER_DOMAIN, TEX_PARSER_ERROR_UNEXPECTED_SYMBOL, "tex_parse(): IN_TAG empty tag, symbol %c(0x%2.2x) at position %d", source[i], (u_int8_t)source[i], i); return; } else { if (!strcmp(tag, TEX_TAG_BEGIN)) where_stack[tex_level] = IN_TAG_BEGIN; else if (!strcmp(tag, TEX_TAG_CLINE)) where_stack[tex_level] = IN_TAG_CLINE; else if (!strcmp(tag, TEX_TAG_END)) where_stack[tex_level] = IN_TAG_END; else if (!strcmp(tag, TEX_TAG_ENDFOOT)) where_stack[tex_level] = IN_TAG_ENDFOOT; else if (!strcmp(tag, TEX_TAG_ENDHEAD)) where_stack[tex_level] = IN_TAG_ENDHEAD; else if (!strcmp(tag, TEX_TAG_HLINE)) where_stack[tex_level] = IN_TAG_HLINE; else if (!strcmp(tag, TEX_TAG_HSPACE)) where_stack[tex_level] = IN_TAG_HSPACE; else if (!strcmp(tag, TEX_TAG_MULTICOLUMN)) where_stack[tex_level] = IN_TAG_MULTICOLUMN; else if (!strcmp(tag, TEX_TAG_MULTIROW)) where_stack[tex_level] = IN_TAG_MULTIROW; else if (!strcmp(tag, TEX_TAG_TABULARNEWLINE)) where_stack[tex_level] = IN_TAG_TABULARNEWLINE; else where_stack[tex_level] = IN_TAG_UNKNOWN; #ifdef __TEX_PARSER_DEBUG printf("\\%s\n", tag); #endif zclear(&tag); i--; } break; case IN_SPACE: if (!isspace(c)) { where_stack[tex_level--] = IN_UNDEF; i--; } break; case IN_TAGPARM: if (c == '{' || c == '[') { // tag params zclear(¶m); where_stack[++tex_level] = IN_TAGPARM; } else if (c == '}' || c == ']') { // end tag params where_stack[tex_level--] = IN_UNDEF; } else if (c == '\\') { // new tag zclear(&tag); where_stack[++tex_level] = IN_TAG; } else if (isspace(c)) { where_stack[++tex_level] = IN_SPACE; } else if (c == '%') { where_stack[++tex_level] = IN_COMMENT; } else if (isgraph(c) || c < 0) { } else { z_set_error(error, TEX_PARSER_DOMAIN, TEX_PARSER_ERROR_UNEXPECTED_SYMBOL, "tex_parse(): IN_TAGPARM unexpected symbol %c(0x%2.2x) at position %d", source[i], (u_int8_t)source[i], i); return; } zalloc_append8_str(param, c); break; case IN_TAG_UNKNOWN: case IN_TAG_BEGIN: case IN_TAG_CLINE: case IN_TAG_END: case IN_TAG_ENDFOOT: case IN_TAG_ENDHEAD: case IN_TAG_HLINE: case IN_TAG_HSPACE: case IN_TAG_MULTICOLUMN: case IN_TAG_MULTIROW: case IN_TAG_TABULARNEWLINE: /*switch (where_stack[tex_level]) { case IN_TAG_UNKNOWN: break; case IN_TAG_BEGIN: break; case IN_TAG_CLINE: break; case IN_TAG_END: break; case IN_TAG_ENDFOOT: break; case IN_TAG_ENDHEAD: break; case IN_TAG_HLINE: break; case IN_TAG_HSPACE: break; case IN_TAG_MULTICOLUMN: break; case IN_TAG_MULTIROW: break; case IN_TAG_TABULARNEWLINE: break; default: z_set_error(error, TEX_PARSER_DOMAIN, TEX_PARSER_ERROR_UNKNOWN, "tex_parse(): IN_(%d) error in code(uncontrolled nested switch case)," " symbol %c(0x%2.2x) at position %d", where_stack[tex_level], source[i], (u_int8_t)source[i], i); return; break; }//*/ if (c == '{' || c == '[') { zclear(¶m); where_stack[++tex_level] = IN_TAGPARM; } else if (c == '}' || c == ']') { where_stack[tex_level--] = IN_UNDEF; i--; } else if (c == '\\') { zclear(&tag); where_stack[tex_level] = IN_TAG; } else if (isspace(c)) where_stack[++tex_level] = IN_SPACE; else if (c == '%') where_stack[++tex_level] = IN_COMMENT; else if (isgraph(c) || c < 0) { } else { z_set_error(error, TEX_PARSER_DOMAIN, TEX_PARSER_ERROR_UNEXPECTED_SYMBOL, "tex_parse(): IN_%d unexpected symbol %c(0x%2.2x) at position %d", where_stack[tex_level], source[i], (u_int8_t)source[i], i); return; } break; case IN_FORMULA: break; default: z_set_error(error, TEX_PARSER_DOMAIN, TEX_PARSER_ERROR_PLACE_UNKNOWN, "tex_parse(): IN_%d unknown place, symbol %c(0x%2.2x) at position %d", where_stack[tex_level], source[i], (u_int8_t)source[i], i); return; break; } } }