192 lines
4.8 KiB
C
192 lines
4.8 KiB
C
/* a GScanner will tokenize your text, that is, it'll return an integer
|
|
for every word or number that appears in its input stream, following
|
|
certain (customizable) rules to perform this translation.
|
|
you still need to write the parsing functions on your own though.
|
|
here's a little test program that will parse
|
|
|
|
<SYMBOL> = <OPTIONAL-MINUS> <NUMBER> ;
|
|
|
|
constructs, while skipping "#\n" and "/.**./" style comments.
|
|
*/
|
|
|
|
#include <glib.h>
|
|
|
|
/* some test text to be fed into the scanner */
|
|
static const gchar *test_text =
|
|
( "ping = 5;\n"
|
|
"/* slide in some \n"
|
|
" * comments, just for the\n"
|
|
" * fun of it \n"
|
|
" */\n"
|
|
"pong = -6; \n"
|
|
"\n"
|
|
"# the next value is a float\n"
|
|
"zonk = 0.7;\n"
|
|
"# redefine ping\n"
|
|
"ping = - 0.5;\n" );
|
|
|
|
/* define enumeration values to be returned for specific symbols */
|
|
enum {
|
|
SYMBOL_PING = G_TOKEN_LAST + 1,
|
|
SYMBOL_PONG = G_TOKEN_LAST + 2,
|
|
SYMBOL_ZONK = G_TOKEN_LAST + 3
|
|
};
|
|
|
|
/* symbol array */
|
|
static const struct {
|
|
gchar *symbol_name;
|
|
guint symbol_token;
|
|
} symbols[] = {
|
|
{ "ping", SYMBOL_PING, },
|
|
{ "pong", SYMBOL_PONG, },
|
|
{ "zonk", SYMBOL_ZONK, },
|
|
{ NULL, 0, },
|
|
}, *symbol_p = symbols;
|
|
|
|
static gfloat ping = 0;
|
|
static gfloat pong = 0;
|
|
static gfloat zonk = 0;
|
|
|
|
static guint
|
|
parse_symbol (GScanner *scanner)
|
|
{
|
|
guint symbol;
|
|
gboolean negate = FALSE;
|
|
|
|
/* expect a valid symbol */
|
|
g_scanner_get_next_token (scanner);
|
|
symbol = scanner->token;
|
|
if (symbol < SYMBOL_PING ||
|
|
symbol > SYMBOL_ZONK)
|
|
return G_TOKEN_SYMBOL;
|
|
|
|
/* expect '=' */
|
|
g_scanner_get_next_token (scanner);
|
|
if (scanner->token != '=')
|
|
return '=';
|
|
|
|
/* feature optional '-' */
|
|
g_scanner_peek_next_token (scanner);
|
|
if (scanner->next_token == '-')
|
|
{
|
|
g_scanner_get_next_token (scanner);
|
|
negate = !negate;
|
|
}
|
|
|
|
/* expect a float (ints are converted to floats on the fly) */
|
|
g_scanner_get_next_token (scanner);
|
|
if (scanner->token != G_TOKEN_FLOAT)
|
|
return G_TOKEN_FLOAT;
|
|
|
|
/* make sure the next token is a ';' */
|
|
if (g_scanner_peek_next_token (scanner) != ';')
|
|
{
|
|
/* not so, eat up the non-semicolon and error out */
|
|
g_scanner_get_next_token (scanner);
|
|
return ';';
|
|
}
|
|
|
|
/* assign value, eat the semicolon and exit successfully */
|
|
switch (symbol)
|
|
{
|
|
case SYMBOL_PING:
|
|
ping = negate ? - scanner->value.v_float : scanner->value.v_float;
|
|
break;
|
|
case SYMBOL_PONG:
|
|
pong = negate ? - scanner->value.v_float : scanner->value.v_float;
|
|
break;
|
|
case SYMBOL_ZONK:
|
|
zonk = negate ? - scanner->value.v_float : scanner->value.v_float;
|
|
break;
|
|
}
|
|
g_scanner_get_next_token (scanner);
|
|
|
|
return G_TOKEN_NONE;
|
|
}
|
|
|
|
int
|
|
main (int argc, char *argv[])
|
|
{
|
|
GScanner *scanner;
|
|
guint expected_token;
|
|
|
|
scanner = g_scanner_new (NULL);
|
|
|
|
/* adjust lexing behaviour to suit our needs
|
|
*/
|
|
/* convert non-floats (octal values, hex values...) to G_TOKEN_INT */
|
|
scanner->config->numbers_2_int = TRUE;
|
|
/* convert G_TOKEN_INT to G_TOKEN_FLOAT */
|
|
scanner->config->int_2_float = TRUE;
|
|
/* don't return G_TOKEN_SYMBOL, but the symbol's value */
|
|
scanner->config->symbol_2_token = TRUE;
|
|
|
|
scanner->config->cpair_comment_single = "%\n";
|
|
|
|
/* load symbols into the scanner */
|
|
while (symbol_p->symbol_name)
|
|
{
|
|
g_scanner_add_symbol (scanner,
|
|
symbol_p->symbol_name,
|
|
GINT_TO_POINTER (symbol_p->symbol_token));
|
|
symbol_p++;
|
|
}
|
|
|
|
/* feed in the text */
|
|
g_scanner_input_text (scanner, test_text, strlen (test_text));
|
|
|
|
/* give the error handler an idea on how the input is named */
|
|
scanner->input_name = "test text";
|
|
|
|
/* scanning loop, we parse the input untill it's end is reached,
|
|
* the scanner encountered a lexing error, or our sub routine came
|
|
* across invalid syntax
|
|
*/
|
|
do
|
|
{
|
|
expected_token = parse_symbol (scanner);
|
|
|
|
g_scanner_peek_next_token (scanner);
|
|
}
|
|
while (expected_token == G_TOKEN_NONE &&
|
|
scanner->next_token != G_TOKEN_EOF &&
|
|
scanner->next_token != G_TOKEN_ERROR);
|
|
|
|
/* give an error message upon syntax errors */
|
|
if (expected_token != G_TOKEN_NONE)
|
|
g_scanner_unexp_token (scanner, expected_token, NULL, "symbol", NULL, NULL, TRUE);
|
|
|
|
/* finsish parsing */
|
|
g_scanner_destroy (scanner);
|
|
|
|
/* print results */
|
|
g_print ("ping: %f\n", ping);
|
|
g_print ("pong: %f\n", pong);
|
|
g_print ("zonk: %f\n", zonk);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
this gives:
|
|
$ gcc -Wall `glib-config --cflags --libs` gscanner-ex.c && ./a.out
|
|
ping: -0.500000
|
|
pong: -6.000000
|
|
zonk: 0.700000
|
|
$
|
|
|
|
if you change line 6 in the input text to "pong = +6; \n", you get:
|
|
|
|
$ gcc -Wall `glib-config --cflags --libs` gscanner-ex.c && ./a.out
|
|
test text:6: error: unexpected character `+', expected number (float)
|
|
ping: 5.000000
|
|
pong: 0.000000
|
|
zonk: 0.000000
|
|
$
|
|
|
|
since '+'s are not featured by parse_symbol(). since parsing is aborted,
|
|
the lines that would assign values to pong and zonk are not evaluated and
|
|
thus their values remain 0.
|
|
*/
|
|
|