dev/c/tex_parser/tex_parser.c

292 lines
7.1 KiB
C
Raw Normal View History

#include "tex_parser.h"
#include <string.h>
2011-06-10 19:03:50 +04:00
#include <ctype.h>
#include "tex_parser_tags.h"
#include "tex_parser_const.h"
#include "zalloc_ext.h"
#include "zalloc.h"
#include "c_const.h"
#include "xerror.h"
#include "zerror.h"
2011-06-27 15:09:19 +04:00
/* only for debug */
2011-07-04 18:24:14 +04:00
#define __TEX_PARSER_DEBUG
2011-06-28 16:16:35 +04:00
#include <stdio.h>
2011-06-21 12:01:53 +04:00
2011-06-27 15:09:19 +04:00
2011-07-04 18:24:14 +04:00
void tex_parse(const char *source, size_t len, struct tex_struct_s **tex_struct, struct zerror_s **error)
{
2011-06-20 17:24:43 +04:00
if (!len)
len = strlen(source);
2011-06-20 17:24:43 +04:00
2011-06-10 19:03:50 +04:00
enum where_e
{
2011-06-20 17:24:43 +04:00
IN_UNDEF = 0, // undefined place
IN_COMMENT, // any comment
IN_TAG, // any tag
2011-06-27 14:43:41 +04:00
IN_TAG_UNKNOWN, // unknown tag
IN_SPACE, // space, \tag { for example
2011-06-20 20:07:17 +04:00
IN_TAGPARM, // in \tag{} curly braces
IN_TAG_BEGIN,
IN_TAG_END,
IN_TAG_TABULARNEWLINE,
2011-06-20 17:24:43 +04:00
IN_FORMULA, // in $...$
2011-06-20 20:07:17 +04:00
2011-06-20 17:24:43 +04:00
} where_stack[MAX_TEX_STACK_LEVEL] = {IN_UNDEF};
2011-07-15 18:15:48 +04:00
enum object_e {
OBJ_UNDEF = 0,
OBJ_TABLE,
OBJ_TABLE_PARAMS,
OBJ_TABLE_SUBTABLE,
2011-07-15 18:15:48 +04:00
OBJ_TABLE_HEADER,
OBJ_TABLE_FIRSTHEADER,
2011-07-15 18:15:48 +04:00
OBJ_TABLE_FOOTER,
OBJ_TABLE_FIRSTFOOTER,
2011-07-15 18:15:48 +04:00
OBJ_GRAPHICS
} object_stack[MAX_TEX_STACK_LEVEL] = {OBJ_UNDEF};
char *tag = NULL, *param = NULL;
2011-06-27 14:43:41 +04:00
size_t i = 0;
2011-06-20 17:24:43 +04:00
size_t tex_level = 0;
2011-07-15 18:15:48 +04:00
size_t obj_level = 0;
2011-06-20 17:24:43 +04:00
for (i = 0; i < len; i++) {
2011-06-20 20:07:17 +04:00
/* stack checking */
if (tex_level + 1 == MAX_TEX_STACK_LEVEL) {
2011-07-01 19:29:58 +04:00
z_set_error(error, TEX_PARSER_DOMAIN, TEX_PARSER_ERROR_STACK,
"tex_parse(): stack overflow, symbol %c(0x%2.2x) at position %d",
source[i], (u_int8_t)source[i], i);
return;
2011-06-20 20:07:17 +04:00
}
2011-06-21 12:01:53 +04:00
/* read one character from input stream */
char c = source[i];
#ifdef __TEX_PARSER_DEBUG
printf("tex_level = %lu, c = %c\n", tex_level, c);
#endif
2011-06-28 16:16:35 +04:00
2011-06-21 12:01:53 +04:00
/* looking at where_stack and encountered character */
2011-06-20 17:24:43 +04:00
switch (where_stack[tex_level]) {
case IN_UNDEF:
2011-06-10 19:03:50 +04:00
if (c == '\\') {
zclear(&tag);
where_stack[++tex_level] = IN_TAG;
2011-06-20 20:07:17 +04:00
} else if (isspace(c)) {
where_stack[++tex_level] = IN_SPACE;
2011-06-20 17:24:43 +04:00
} else if (c == '%') {
where_stack[++tex_level] = IN_COMMENT;
2011-06-20 20:07:17 +04:00
} else if (isgraph(c) || c < 0) {
2011-06-20 17:24:43 +04:00
} else {
2011-07-01 19:29:58 +04:00
z_set_error(error, TEX_PARSER_DOMAIN, TEX_PARSER_ERROR_UNEXPECTED_SYMBOL,
"tex_parse(): IN_UNDEF unexpected symbol %c(0x%2.2x) at position %d",
source[i], (u_int8_t)source[i], i);
return;
2011-06-20 17:24:43 +04:00
}
break;
case IN_COMMENT:
if (c == '\r' || c == '\n') {
where_stack[tex_level--] = IN_UNDEF;
2011-06-10 19:03:50 +04:00
}
break;
case IN_TAG:
2011-06-23 19:57:17 +04:00
if (isalnum(c)) {
2011-06-21 12:01:53 +04:00
tag = zalloc_append8_str(tag, c);
2011-06-23 19:44:22 +04:00
} else if (!tag && (c == '\\' || c == ','
|| c == '%' || c == '_')) {
2011-06-23 19:57:17 +04:00
where_stack[tex_level--] = IN_UNDEF;
2011-06-20 20:07:17 +04:00
2011-06-23 19:44:22 +04:00
} else if (!tag) {
2011-07-01 19:29:58 +04:00
z_set_error(error, TEX_PARSER_DOMAIN, TEX_PARSER_ERROR_UNEXPECTED_SYMBOL,
"tex_parse(): IN_TAG empty tag, symbol %c(0x%2.2x) at position %d",
source[i], (u_int8_t)source[i], i);
return;
2011-06-10 19:03:50 +04:00
2011-06-27 15:09:19 +04:00
} else {
2011-06-28 16:16:35 +04:00
if (!strcmp(tag, TEX_TAG_BEGIN))
2011-06-27 15:09:19 +04:00
where_stack[tex_level] = IN_TAG_BEGIN;
2011-06-28 16:16:35 +04:00
else if (!strcmp(tag, TEX_TAG_END))
2011-06-27 15:09:19 +04:00
where_stack[tex_level] = IN_TAG_END;
else if (!strcmp(tag, TEX_TAG_TABULARNEWLINE)) {
2011-06-27 15:09:19 +04:00
where_stack[tex_level] = IN_TAG_TABULARNEWLINE;
2011-06-28 16:16:35 +04:00
} else if (!strcmp(tag, TEX_TAG_ENDFIRSTHEAD)) {
// subtable is first header - some actions here...
object_stack[obj_level] = OBJ_TABLE_SUBTABLE;
#ifdef __TEX_PARSER_DEBUG
puts("=OBJ_TABLE_SUBTABLE");
#endif
} else if (!strcmp(tag, TEX_TAG_ENDHEAD)) {
// subtable is header - some actions here...
object_stack[obj_level] = OBJ_TABLE_SUBTABLE;
#ifdef __TEX_PARSER_DEBUG
puts("=OBJ_TABLE_SUBTABLE");
#endif
} else if (!strcmp(tag, TEX_TAG_ENDFIRSTFOOT)) {
// subtable is first footer - some actions here...
object_stack[obj_level] = OBJ_TABLE_SUBTABLE;
#ifdef __TEX_PARSER_DEBUG
puts("=OBJ_TABLE_SUBTABLE");
#endif
} else if (!strcmp(tag, TEX_TAG_ENDFOOT)) {
// subtable is footer - some actions here...
object_stack[obj_level] = OBJ_TABLE_SUBTABLE;
#ifdef __TEX_PARSER_DEBUG
puts("=OBJ_TABLE_SUBTABLE");
#endif
} else
2011-06-27 15:09:19 +04:00
where_stack[tex_level] = IN_TAG_UNKNOWN;
#ifdef __TEX_PARSER_DEBUG
2011-06-28 16:16:35 +04:00
printf("\\%s\n", tag);
#endif
zclear(&tag);
2011-06-27 15:09:19 +04:00
2011-06-28 16:16:35 +04:00
i--;
2011-06-27 14:43:41 +04:00
}
break;
2011-06-23 19:57:17 +04:00
2011-06-27 14:43:41 +04:00
case IN_SPACE:
2011-06-28 16:16:35 +04:00
if (!isspace(c)) {
2011-06-27 14:43:41 +04:00
where_stack[tex_level--] = IN_UNDEF;
2011-06-27 15:54:00 +04:00
i--;
2011-06-20 17:24:43 +04:00
}
break;
case IN_TAGPARM:
2011-06-23 19:44:22 +04:00
if (c == '{' || c == '[') { // tag params
2011-07-04 11:31:35 +04:00
zclear(&param);
2011-06-21 19:13:25 +04:00
where_stack[++tex_level] = IN_TAGPARM;
2011-06-23 19:44:22 +04:00
} else if (c == '}' || c == ']') { // end tag params
2011-07-04 18:24:14 +04:00
#ifdef __TEX_PARSER_DEBUG
if (param)
printf("IN_TAGPARM: {%s}\n", param);
#endif
2011-07-12 19:13:22 +04:00
if (tex_level && where_stack[tex_level - 1] == IN_TAG_BEGIN
&& !strcmp(param, TEX_PARAM_TABLE)) {
2011-07-15 18:15:48 +04:00
object_stack[++obj_level] = OBJ_TABLE;
#ifdef __TEX_PARSER_DEBUG
puts("->OBJ_TABLE");
#endif
} else if (tex_level && where_stack[tex_level - 1] == IN_TAG_END
&& !strcmp(param, TEX_PARAM_TABLE)) {
2011-07-15 18:15:48 +04:00
object_stack[obj_level--] = OBJ_UNDEF;
#ifdef __TEX_PARSER_DEBUG
puts("<-OBJ_UNDEF");
#endif
}
2011-07-12 19:13:22 +04:00
where_stack[tex_level--] = IN_UNDEF;
} else if (c == '\\') { // new tag
zclear(&tag);
where_stack[++tex_level] = IN_TAG;
2011-06-27 14:43:41 +04:00
} else if (c == '%') {
where_stack[++tex_level] = IN_COMMENT;
2011-07-04 18:24:14 +04:00
} else if (isgraph(c) || isspace(c) || c < 0) {
param = zalloc_append8_str(param, c);
2011-06-21 19:13:25 +04:00
} else {
2011-07-01 19:29:58 +04:00
z_set_error(error, TEX_PARSER_DOMAIN, TEX_PARSER_ERROR_UNEXPECTED_SYMBOL,
"tex_parse(): IN_TAGPARM unexpected symbol %c(0x%2.2x) at position %d",
source[i], (u_int8_t)source[i], i);
return;
2011-06-27 14:43:41 +04:00
}
break;
case IN_TAG_UNKNOWN:
case IN_TAG_BEGIN:
2011-06-28 16:16:35 +04:00
case IN_TAG_END:
case IN_TAG_TABULARNEWLINE:
2011-07-04 11:31:35 +04:00
if (c == '{' || c == '[') {
switch (object_stack[obj_level]) {
case OBJ_TABLE:
object_stack[++obj_level] = OBJ_TABLE_PARAMS;
#ifdef __TEX_PARSER_DEBUG
puts("->OBJ_TABLE_PARAMS");
#endif
break;
default:
break;
}
2011-07-04 11:31:35 +04:00
zclear(&param);
2011-06-27 17:07:52 +04:00
where_stack[++tex_level] = IN_TAGPARM;
2011-07-04 11:31:35 +04:00
} else if (c == '}' || c == ']') {
2011-07-04 18:24:14 +04:00
#ifdef __TEX_PARSER_DEBUG
if (param)
printf("IN_TAGPARM: {%s}\n", param);
#endif
2011-06-27 17:07:52 +04:00
where_stack[tex_level--] = IN_UNDEF;
2011-06-28 11:11:23 +04:00
i--;
2011-06-27 17:07:52 +04:00
2011-06-28 16:16:35 +04:00
} else if (c == '\\') {
switch (object_stack[obj_level]) {
case OBJ_TABLE:
object_stack[++obj_level] = OBJ_TABLE_SUBTABLE;
#ifdef __TEX_PARSER_DEBUG
puts("->OBJ_TABLE_SUBTABLE");
#endif
break;
default:
break;
}
zclear(&tag);
where_stack[tex_level] = IN_TAG;
2011-07-04 18:24:14 +04:00
} else if (isspace(c)) {
where_stack[++tex_level] = IN_SPACE;
2011-07-04 18:24:14 +04:00
} else if (c == '%')
2011-06-28 16:16:35 +04:00
where_stack[++tex_level] = IN_COMMENT;
2011-06-28 16:16:35 +04:00
else if (isgraph(c) || c < 0) {
} else {
2011-07-01 19:29:58 +04:00
z_set_error(error, TEX_PARSER_DOMAIN, TEX_PARSER_ERROR_UNEXPECTED_SYMBOL,
"tex_parse(): IN_%d unexpected symbol %c(0x%2.2x) at position %d",
where_stack[tex_level], source[i], (u_int8_t)source[i], i);
return;
}
break;
case IN_FORMULA:
break;
2011-06-20 17:24:43 +04:00
default:
2011-07-01 19:29:58 +04:00
z_set_error(error, TEX_PARSER_DOMAIN, TEX_PARSER_ERROR_PLACE_UNKNOWN,
"tex_parse(): IN_%d unknown place, symbol %c(0x%2.2x) at position %d",
where_stack[tex_level], source[i], (u_int8_t)source[i], i);
return;
2011-06-20 17:24:43 +04:00
break;
2011-06-10 19:03:50 +04:00
}
2011-06-20 17:24:43 +04:00
}
}
2011-07-15 18:15:48 +04:00