dev/c/tex_parser/tex_parser.c

286 lines
5.4 KiB
C
Raw Normal View History

#include "tex_parser.h"
#include <string.h>
2011-06-10 19:03:50 +04:00
#include <ctype.h>
#include "tex_parser_tags.h"
#include "zalloc_ext.h"
#include "zalloc.h"
#include "c_const.h"
2011-06-27 15:09:19 +04:00
/* only for debug */
#define __TEX_PARSER_DEBUG 0
2011-06-28 16:16:35 +04:00
#include <stdio.h>
2011-06-21 12:01:53 +04:00
2011-06-27 15:09:19 +04:00
int tex_parse(const char *source, size_t len, struct zerror_s *error)
{
2011-06-27 16:08:04 +04:00
error->code = 0;
error->message = NULL;
2011-06-20 17:24:43 +04:00
if (!len)
len = strlen(source);
2011-06-20 17:24:43 +04:00
2011-06-10 19:03:50 +04:00
enum where_e
{
2011-06-20 17:24:43 +04:00
IN_UNDEF = 0, // undefined place
IN_COMMENT, // any comment
IN_TAG, // any tag
2011-06-27 14:43:41 +04:00
IN_TAG_UNKNOWN, // unknown tag
IN_SPACE, // space, \tag { for example
2011-06-20 20:07:17 +04:00
IN_TAGPARM, // in \tag{} curly braces
IN_TAG_BEGIN,
IN_TAG_CLINE,
IN_TAG_END,
IN_TAG_ENDFOOT,
IN_TAG_ENDHEAD,
IN_TAG_HLINE,
IN_TAG_HSPACE,
IN_TAG_MULTICOLUMN,
IN_TAG_MULTIROW,
IN_TAG_TABULARNEWLINE,
2011-06-20 17:24:43 +04:00
IN_FORMULA, // in $...$
2011-06-20 20:07:17 +04:00
2011-06-20 17:24:43 +04:00
} where_stack[MAX_TEX_STACK_LEVEL] = {IN_UNDEF};
2011-06-27 14:43:41 +04:00
char *tag = NULL;
size_t i = 0;
2011-06-20 17:24:43 +04:00
size_t tex_level = 0;
for (i = 0; i < len; i++) {
2011-06-20 20:07:17 +04:00
/* stack checking */
if (tex_level + 1 == MAX_TEX_STACK_LEVEL) {
2011-06-27 14:43:41 +04:00
error->code = (int)i;
error->message = "stack overflow";
return error->code;
2011-06-20 20:07:17 +04:00
}
2011-06-21 12:01:53 +04:00
/* read one character from input stream */
char c = source[i];
#ifdef __TEX_PARSER_DEBUG
printf("tex_level = %lu, c = %c\n", tex_level, c);
#endif
2011-06-28 16:16:35 +04:00
2011-06-21 12:01:53 +04:00
/* looking at where_stack and encountered character */
2011-06-20 17:24:43 +04:00
switch (where_stack[tex_level]) {
case IN_UNDEF:
2011-06-10 19:03:50 +04:00
if (c == '\\') {
2011-06-20 17:25:44 +04:00
tag = zfree_null(tag);
where_stack[++tex_level] = IN_TAG;
2011-06-20 20:07:17 +04:00
} else if (isspace(c)) {
where_stack[++tex_level] = IN_SPACE;
2011-06-20 17:24:43 +04:00
} else if (c == '%') {
where_stack[++tex_level] = IN_COMMENT;
2011-06-20 20:07:17 +04:00
} else if (isgraph(c) || c < 0) {
2011-06-20 17:24:43 +04:00
} else {
2011-06-27 14:43:41 +04:00
error->code = (int)i;
error->message = "unexpected symbol (IN_UDEF)";
return error->code;
2011-06-20 17:24:43 +04:00
}
break;
case IN_COMMENT:
if (c == '\r' || c == '\n') {
where_stack[tex_level--] = IN_UNDEF;
2011-06-10 19:03:50 +04:00
}
break;
case IN_TAG:
2011-06-23 19:57:17 +04:00
if (isalnum(c)) {
2011-06-21 12:01:53 +04:00
tag = zalloc_append8_str(tag, c);
2011-06-23 19:44:22 +04:00
} else if (!tag && (c == '\\' || c == ','
|| c == '%' || c == '_')) {
2011-06-23 19:57:17 +04:00
where_stack[tex_level--] = IN_UNDEF;
2011-06-20 20:07:17 +04:00
2011-06-23 19:44:22 +04:00
} else if (!tag) {
2011-06-27 14:43:41 +04:00
error->code = (int)i;
error->message = "empty tag (IN_TAG)";
2011-06-23 19:44:22 +04:00
return error->code;
2011-06-10 19:03:50 +04:00
2011-06-27 15:09:19 +04:00
} else {
2011-06-28 16:16:35 +04:00
if (!strcmp(tag, TEX_TAG_BEGIN))
2011-06-27 15:09:19 +04:00
where_stack[tex_level] = IN_TAG_BEGIN;
2011-06-28 16:16:35 +04:00
else if (!strcmp(tag, TEX_TAG_CLINE))
2011-06-27 15:09:19 +04:00
where_stack[tex_level] = IN_TAG_CLINE;
2011-06-28 16:16:35 +04:00
else if (!strcmp(tag, TEX_TAG_END))
2011-06-27 15:09:19 +04:00
where_stack[tex_level] = IN_TAG_END;
2011-06-28 16:16:35 +04:00
else if (!strcmp(tag, TEX_TAG_ENDFOOT))
2011-06-27 15:09:19 +04:00
where_stack[tex_level] = IN_TAG_ENDFOOT;
2011-06-28 16:16:35 +04:00
else if (!strcmp(tag, TEX_TAG_ENDHEAD))
2011-06-27 15:09:19 +04:00
where_stack[tex_level] = IN_TAG_ENDHEAD;
2011-06-28 16:16:35 +04:00
else if (!strcmp(tag, TEX_TAG_HLINE))
2011-06-27 15:09:19 +04:00
where_stack[tex_level] = IN_TAG_HLINE;
2011-06-28 16:16:35 +04:00
else if (!strcmp(tag, TEX_TAG_HSPACE))
2011-06-27 15:09:19 +04:00
where_stack[tex_level] = IN_TAG_HSPACE;
2011-06-28 16:16:35 +04:00
else if (!strcmp(tag, TEX_TAG_MULTICOLUMN))
2011-06-27 15:09:19 +04:00
where_stack[tex_level] = IN_TAG_MULTICOLUMN;
2011-06-28 16:16:35 +04:00
else if (!strcmp(tag, TEX_TAG_MULTIROW))
2011-06-27 15:09:19 +04:00
where_stack[tex_level] = IN_TAG_MULTIROW;
2011-06-28 16:16:35 +04:00
else if (!strcmp(tag, TEX_TAG_TABULARNEWLINE))
2011-06-27 15:09:19 +04:00
where_stack[tex_level] = IN_TAG_TABULARNEWLINE;
2011-06-28 16:16:35 +04:00
else
2011-06-27 15:09:19 +04:00
where_stack[tex_level] = IN_TAG_UNKNOWN;
#ifdef __TEX_PARSER_DEBUG
2011-06-28 16:16:35 +04:00
printf("\\%s\n", tag);
#endif
tag = zfree_null(tag);
2011-06-27 15:09:19 +04:00
2011-06-28 16:16:35 +04:00
i--;
2011-06-27 14:43:41 +04:00
}
break;
2011-06-23 19:57:17 +04:00
2011-06-27 14:43:41 +04:00
case IN_SPACE:
2011-06-28 16:16:35 +04:00
if (!isspace(c)) {
2011-06-27 14:43:41 +04:00
where_stack[tex_level--] = IN_UNDEF;
2011-06-27 15:54:00 +04:00
i--;
2011-06-20 17:24:43 +04:00
}
break;
case IN_TAGPARM:
2011-06-23 19:44:22 +04:00
if (c == '{' || c == '[') { // tag params
2011-06-21 19:13:25 +04:00
where_stack[++tex_level] = IN_TAGPARM;
2011-06-23 19:44:22 +04:00
} else if (c == '}' || c == ']') { // end tag params
where_stack[tex_level--] = IN_UNDEF;
} else if (c == '\\') { // new tag
2011-06-23 19:44:22 +04:00
tag = zfree_null(tag);
where_stack[++tex_level] = IN_TAG;
2011-06-27 14:43:41 +04:00
} else if (isspace(c)) {
where_stack[++tex_level] = IN_SPACE;
} else if (c == '%') {
where_stack[++tex_level] = IN_COMMENT;
} else if (isgraph(c) || c < 0) {
2011-06-21 19:13:25 +04:00
} else {
2011-06-27 14:43:41 +04:00
error->code = (int)i;
error->message = "unexpected symbol (IN_TAGPARM)";
return error->code;
}
break;
case IN_TAG_UNKNOWN:
case IN_TAG_BEGIN:
case IN_TAG_CLINE:
2011-06-28 16:16:35 +04:00
case IN_TAG_END:
case IN_TAG_ENDFOOT:
case IN_TAG_ENDHEAD:
case IN_TAG_HLINE:
case IN_TAG_HSPACE:
case IN_TAG_MULTICOLUMN:
case IN_TAG_MULTIROW:
case IN_TAG_TABULARNEWLINE:
switch(where_stack[tex_level]) {
2011-06-28 16:16:35 +04:00
case IN_TAG_UNKNOWN:
2011-06-28 16:16:35 +04:00
break;
2011-06-28 16:16:35 +04:00
case IN_TAG_BEGIN:
2011-06-28 16:16:35 +04:00
break;
2011-06-28 16:16:35 +04:00
case IN_TAG_CLINE:
2011-06-28 16:16:35 +04:00
break;
2011-06-28 16:16:35 +04:00
case IN_TAG_END:
2011-06-28 16:16:35 +04:00
break;
2011-06-28 16:16:35 +04:00
case IN_TAG_ENDFOOT:
2011-06-28 16:16:35 +04:00
break;
2011-06-28 16:16:35 +04:00
case IN_TAG_ENDHEAD:
2011-06-28 16:16:35 +04:00
break;
2011-06-28 16:16:35 +04:00
case IN_TAG_HLINE:
2011-06-28 16:16:35 +04:00
break;
2011-06-27 17:07:52 +04:00
2011-06-28 16:16:35 +04:00
case IN_TAG_HSPACE:
2011-06-27 17:07:52 +04:00
2011-06-28 16:16:35 +04:00
break;
2011-06-27 17:07:52 +04:00
2011-06-28 16:16:35 +04:00
case IN_TAG_MULTICOLUMN:
2011-06-27 17:07:52 +04:00
2011-06-28 16:16:35 +04:00
break;
2011-06-27 17:07:52 +04:00
2011-06-28 16:16:35 +04:00
case IN_TAG_MULTIROW:
2011-06-27 17:07:52 +04:00
2011-06-28 16:16:35 +04:00
break;
2011-06-28 16:16:35 +04:00
case IN_TAG_TABULARNEWLINE:
2011-06-28 16:16:35 +04:00
break;
2011-06-28 16:16:35 +04:00
default:
error->code = (int)i;
error->message = "unknown error (IN_TAG_))";
return error->code;
break;
}
2011-06-28 16:16:35 +04:00
if (c == '{' || c == '[')
2011-06-27 17:07:52 +04:00
where_stack[++tex_level] = IN_TAGPARM;
2011-06-28 16:16:35 +04:00
else if (c == '}' || c == ']') {
2011-06-27 17:07:52 +04:00
where_stack[tex_level--] = IN_UNDEF;
2011-06-28 11:11:23 +04:00
i--;
2011-06-27 17:07:52 +04:00
2011-06-28 16:16:35 +04:00
} else if (c == '\\') {
tag = zfree_null(tag);
where_stack[tex_level] = IN_TAG;
2011-06-28 16:16:35 +04:00
} else if (isspace(c))
where_stack[++tex_level] = IN_SPACE;
2011-06-28 16:16:35 +04:00
else if (c == '%')
where_stack[++tex_level] = IN_COMMENT;
2011-06-28 16:16:35 +04:00
else if (isgraph(c) || c < 0) {
} else {
error->code = (int)i;
2011-06-28 16:16:35 +04:00
error->message = "unexpected symbol (IN_TAG_)";
return error->code;
}
break;
case IN_FORMULA:
break;
2011-06-20 17:24:43 +04:00
default:
2011-06-27 14:43:41 +04:00
error->code = (int)i;
error->message = "unknown error";
return error->code;
2011-06-20 17:24:43 +04:00
break;
2011-06-10 19:03:50 +04:00
}
2011-06-20 17:24:43 +04:00
}
return 0;
}