ENH: Using lex-based tokenizer and a simple recursive-descent parser in place of the old hand-coded parser for CMake listfiles.

This commit is contained in:
Brad King 2003-12-08 13:36:59 -05:00
parent 380ee8ca36
commit 00ae7ea261
6 changed files with 2722 additions and 233 deletions

View File

@ -15,6 +15,7 @@ cmCustomCommand.cxx
cmCacheManager.cxx
cmSourceGroup.cxx
cmListFileCache.cxx
cmListFileLexer.c
cmGlob.cxx
cmGlobalGenerator.cxx
cmGlobalUnixMakefileGenerator.cxx

View File

@ -15,10 +15,16 @@
=========================================================================*/
#include "cmListFileCache.h"
#include "cmListFileLexer.h"
#include "cmSystemTools.h"
#include <cmsys/RegularExpression.hxx>
bool cmListFileCacheParseFunction(cmListFileLexer* lexer,
cmListFileFunction& function,
const char* filename);
cmListFileCache* cmListFileCache::Instance = 0;
@ -83,31 +89,83 @@ bool cmListFileCache::CacheFile(const char* path, bool requireProjectCommand)
{
return false;
}
std::ifstream fin(path);
if(!fin)
// Create the scanner.
cmListFileLexer* lexer = cmListFileLexer_New();
if(!lexer)
{
cmSystemTools::Error("cmListFileCache: error allocating lexer ");
return false;
}
// Open the file.
if(!cmListFileLexer_SetFileName(lexer, path))
{
cmListFileLexer_Delete(lexer);
cmSystemTools::Error("cmListFileCache: error can not open file ", path);
return false;
}
long line=0;
// Use a simple recursive-descent parser to process the token
// stream.
cmListFile inFile;
inFile.m_ModifiedTime = cmSystemTools::ModifiedTime(path);
bool parseError;
while ( fin )
bool parseError = false;
bool haveNewline = true;
cmListFileLexer_Token* token;
while(!parseError && (token = cmListFileLexer_Scan(lexer)))
{
cmListFileFunction inFunction;
if(cmListFileCache::ParseFunction(fin, inFunction, path, parseError,
line))
if(token->type == cmListFileLexer_Token_Newline)
{
inFunction.m_FilePath = path;
inFile.m_Functions.push_back(inFunction);
haveNewline = true;
}
if (parseError)
else if(token->type == cmListFileLexer_Token_Identifier)
{
inFile.m_ModifiedTime = 0;
if(haveNewline)
{
haveNewline = false;
cmListFileFunction inFunction;
inFunction.m_Name = token->text;
inFunction.m_FilePath = path;
inFunction.m_Line = token->line;
if(cmListFileCacheParseFunction(lexer, inFunction, path))
{
inFile.m_Functions.push_back(inFunction);
}
else
{
parseError = true;
}
}
else
{
cmOStringStream error;
error << "Error in cmake code at\n"
<< path << ":" << token->line << ":\n"
<< "Parse error. Expected a newline, got \""
<< token->text << "\".";
cmSystemTools::Error(error.str().c_str());
parseError = true;
}
}
else
{
cmOStringStream error;
error << "Error in cmake code at\n"
<< path << ":" << token->line << ":\n"
<< "Parse error. Expected a command name, got \""
<< token->text << "\".";
cmSystemTools::Error(error.str().c_str());
parseError = true;
}
}
if (parseError)
{
inFile.m_ModifiedTime = 0;
}
cmListFileLexer_Delete(lexer);
if(requireProjectCommand)
{
bool hasProject = false;
@ -146,224 +204,71 @@ void cmListFileCache::FlushCache(const char* path)
}
}
//----------------------------------------------------------------------------
inline bool cmListFileCachePreprocessLine(std::string& line)
bool cmListFileCacheParseFunction(cmListFileLexer* lexer,
cmListFileFunction& function,
const char* filename)
{
// Keep track of whether characters are inside a quoted argument.
bool quoted = false;
// Keep track of whether the line is blank.
bool blank = true;
// Loop over every character in the line.
std::string::iterator c;
for(c = line.begin(); c != line.end(); ++c)
{
if((*c == '\\') && (c < line.end()-1))
{
// A backslash escapes any character, so skip the next
// character.
++c;
// We have encountered a non-whitespace character.
blank = false;
}
else if(*c == '"')
{
// A double-quote either starts or ends a quoted argument.
quoted = !quoted;
// We have encountered a non-whitespace character.
blank = false;
}
else if(*c == '#' && !quoted)
{
// A pound character outside a double-quoted argument marks the
// rest of the line as a comment. Skip it.
break;
}
else if((*c != ' ') && (*c != '\t') && (*c != '\r'))
{
// We have encountered a non-whitespace character.
blank = false;
}
}
// Erase from the comment character to the end of the line. If no
// comment was present, both iterators are end() iterators and this
// does nothing.
line.erase(c, line.end());
// Return true if there is anything useful on this line.
return !blank;
}
//----------------------------------------------------------------------------
bool cmListFileCache::ParseFunction(std::ifstream& fin,
cmListFileFunction& function,
const char* filename,
bool& parseError,
long& line)
{
parseError = false;
std::string& name = function.m_Name;
std::vector<cmListFileArgument>& arguments = function.m_Arguments;
name = "";
arguments = std::vector<cmListFileArgument>();
std::string inbuffer;
if(!fin)
// Command name has already been parsed. Read the left paren.
cmListFileLexer_Token* token;
if(!(token = cmListFileLexer_Scan(lexer)))
{
cmOStringStream error;
error << "Error in cmake code at\n"
<< filename << ":" << cmListFileLexer_GetCurrentLine(lexer) << ":\n"
<< "Parse error. Function missing opening \"(\".";
cmSystemTools::Error(error.str().c_str());
return false;
}
if(cmSystemTools::GetLineFromStream(fin, inbuffer) )
if(token->type != cmListFileLexer_Token_ParenLeft)
{
// Count this line in line numbering.
++line;
// Preprocess the line to remove comments. Only use it if there
// is non-whitespace.
if(!cmListFileCachePreprocessLine(inbuffer))
{
return false;
}
// Regular expressions to match portions of a command invocation.
cmsys::RegularExpression oneLiner("^[ \t]*([A-Za-z_0-9]*)[ \t]*\\((.*)\\)[ \t\r]*$");
cmsys::RegularExpression multiLine("^[ \t]*([A-Za-z_0-9]*)[ \t]*\\((.*)$");
cmsys::RegularExpression lastLine("^(.*)\\)[ \t\r]*$");
cmOStringStream error;
error << "Error in cmake code at\n"
<< filename << ":" << cmListFileLexer_GetCurrentLine(lexer) << ":\n"
<< "Parse error. Expected \"(\", got \""
<< token->text << "\".";
cmSystemTools::Error(error.str().c_str());
return false;
}
// look for a oneline fun(arg arg2)
if(oneLiner.find(inbuffer.c_str()))
// Arguments.
while((token = cmListFileLexer_Scan(lexer)))
{
if(token->type == cmListFileLexer_Token_ParenRight)
{
// the arguments are the second match
std::string args = oneLiner.match(2);
name = oneLiner.match(1);
// break up the arguments
cmListFileCache::GetArguments(args, arguments);
function.m_Line = line;
return true;
}
// look for a start of a multiline with no trailing ")" fun(arg arg2
else if(multiLine.find(inbuffer.c_str()))
else if(token->type == cmListFileLexer_Token_Identifier ||
token->type == cmListFileLexer_Token_ArgumentUnquoted)
{
name = multiLine.match(1);
std::string args = multiLine.match(2);
cmListFileCache::GetArguments(args, arguments);
function.m_Line = line;
// Read lines until the closing paren is hit
bool done = false;
while(!done)
{
// read lines until the end paren is found
if(cmSystemTools::GetLineFromStream(fin, inbuffer) )
{
// Count this line in line numbering.
++line;
// Preprocess the line to remove comments. Only use it if there
// is non-whitespace.
if(!cmListFileCachePreprocessLine(inbuffer))
{
continue;
}
// Is this the last line?
if(lastLine.find(inbuffer.c_str()))
{
done = true;
std::string gargs = lastLine.match(1);
cmListFileCache::GetArguments(gargs, arguments);
}
else
{
cmListFileCache::GetArguments(inbuffer, arguments);
}
}
else
{
parseError = true;
cmOStringStream error;
error << "Error in cmake code at\n"
<< filename << ":" << line << ":\n"
<< "Parse error. Function missing ending \")\".";
cmSystemTools::Error(error.str().c_str());
return false;
}
}
return true;
cmListFileArgument a(cmSystemTools::RemoveEscapes(token->text),
false);
function.m_Arguments.push_back(a);
}
else
else if(token->type == cmListFileLexer_Token_ArgumentQuoted)
{
parseError = true;
cmListFileArgument a(cmSystemTools::RemoveEscapes(token->text),
true);
function.m_Arguments.push_back(a);
}
else if(token->type != cmListFileLexer_Token_Newline)
{
// Error.
cmOStringStream error;
error << "Error in cmake code at\n"
<< filename << ":" << line << ":\n"
<< "Parse error.";
<< filename << ":" << cmListFileLexer_GetCurrentLine(lexer) << ":\n"
<< "Parse error. Function missing ending \")\". "
<< "Instead found \"" << token->text << "\".";
cmSystemTools::Error(error.str().c_str());
return false;
}
}
cmOStringStream error;
error << "Error in cmake code at\n"
<< filename << ":" << cmListFileLexer_GetCurrentLine(lexer) << ":\n"
<< "Parse error. Function missing ending \")\". "
<< "End of file reached.";
cmSystemTools::Error(error.str().c_str());
return false;
}
void cmListFileCache::GetArguments(std::string& line,
std::vector<cmListFileArgument>& arguments)
{
// Match a normal argument (not quoted, no spaces).
cmsys::RegularExpression normalArgument("[ \t]*(([^ \t\r\\]|[\\].)+)[ \t\r]*");
// Match a quoted argument (surrounded by double quotes, spaces allowed).
cmsys::RegularExpression quotedArgument("[ \t]*(\"([^\"\\]|[\\].)*\")[ \t\r]*");
bool done = false;
while(!done)
{
std::string arg;
std::string::size_type endpos=0;
bool quoted = false;
bool foundQuoted = quotedArgument.find(line.c_str());
bool foundNormal = normalArgument.find(line.c_str());
if(foundQuoted && foundNormal)
{
// Both matches were found. Take the earlier one.
// Favor double-quoted version if there is a tie.
if(normalArgument.start(1) < quotedArgument.start(1))
{
arg = normalArgument.match(1);
endpos = normalArgument.end(1);
}
else
{
arg = quotedArgument.match(1);
endpos = quotedArgument.end(1);
// Strip off the double quotes on the ends.
arg = arg.substr(1, arg.length()-2);
quoted = true;
}
}
else if(foundQuoted)
{
arg = quotedArgument.match(1);
endpos = quotedArgument.end(1);
// Strip off the double quotes on the ends.
arg = arg.substr(1, arg.length()-2);
quoted = true;
}
else if(foundNormal)
{
arg = normalArgument.match(1);
endpos = normalArgument.end(1);
}
else
{
done = true;
}
if(!done)
{
cmListFileArgument a(cmSystemTools::RemoveEscapes(arg.c_str()), quoted);
arguments.push_back(a);
line = line.substr(endpos, line.length() - endpos);
}
}
}

View File

@ -79,25 +79,7 @@ public:
//! Flush cache file out of cache.
void FlushCache(const char* path);
/**
* Read a CMake command (or function) from an input file. This
* returns the name of the function and a list of its
* arguments. The last argument is the name of the file that
* the ifstream points to, and is used for debug info only.
*/
static bool ParseFunction(std::ifstream&, cmListFileFunction& function,
const char* filename, bool& parseError,
long& line);
/**
* Extract white-space separated arguments from a string.
* Double quoted strings are accepted with spaces.
* This is called by ParseFunction.
*/
static void GetArguments(std::string& line,
std::vector<cmListFileArgument>& arguments);
private:
// Cache the file
bool CacheFile(const char* path, bool requireProjectCommand);
@ -107,5 +89,4 @@ private:
static cmListFileCache* Instance; // singelton pointer
};
#endif

2216
Source/cmListFileLexer.c Normal file

File diff suppressed because it is too large Load Diff

60
Source/cmListFileLexer.h Normal file
View File

@ -0,0 +1,60 @@
/*=========================================================================
Program: CMake - Cross-Platform Makefile Generator
Module: $RCSfile$
Language: C++
Date: $Date$
Version: $Revision$
Copyright (c) 2002 Kitware, Inc., Insight Consortium. All rights reserved.
See Copyright.txt or http://www.cmake.org/HTML/Copyright.html for details.
This software is distributed WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the above copyright notices for more information.
=========================================================================*/
#ifndef cmListFileLexer_h
#define cmListFileLexer_h
typedef enum cmListFileLexer_Type_e
{
cmListFileLexer_Token_None,
cmListFileLexer_Token_Newline,
cmListFileLexer_Token_Identifier,
cmListFileLexer_Token_ParenLeft,
cmListFileLexer_Token_ParenRight,
cmListFileLexer_Token_ArgumentUnquoted,
cmListFileLexer_Token_ArgumentQuoted,
cmListFileLexer_Token_Error
} cmListFileLexer_Type;
typedef struct cmListFileLexer_Token_s cmListFileLexer_Token;
struct cmListFileLexer_Token_s
{
cmListFileLexer_Type type;
char* text;
int length;
int line;
int column;
};
typedef struct cmListFileLexer_s cmListFileLexer;
#ifdef __cplusplus
extern "C"
{
#endif
cmListFileLexer* cmListFileLexer_New();
int cmListFileLexer_SetFileName(cmListFileLexer*, const char*);
cmListFileLexer_Token* cmListFileLexer_Scan(cmListFileLexer*);
long cmListFileLexer_GetCurrentLine(cmListFileLexer*);
long cmListFileLexer_GetCurrentColumn(cmListFileLexer*);
void cmListFileLexer_Delete(cmListFileLexer*);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif

326
Source/cmListFileLexer.l Normal file
View File

@ -0,0 +1,326 @@
%{
/*=========================================================================
Program: CMake - Cross-Platform Makefile Generator
Module: $RCSfile$
Language: C++
Date: $Date$
Version: $Revision$
Copyright (c) 2002 Kitware, Inc., Insight Consortium. All rights reserved.
See Copyright.txt or http://www.cmake.org/HTML/Copyright.html for details.
This software is distributed WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the above copyright notices for more information.
=========================================================================*/
/*
This file must be translated to C and modified to build everywhere.
Run flex like this:
flex -ocmListFileLexer.c cmListFileLexer.l
Modify cmListFileLexer.c:
- remove TABs
- remove the yyunput function
- add a statement "(void)yyscanner;" to the top of these methods:
yy_fatal_error, yyalloc, yyrealloc, yyfree
*/
/* Disable features we do not need. */
#define YY_NEVER_INTERACTIVE 1
#define YY_NO_UNPUT 1
#define ECHO
/* Setup the proper yylex declaration. */
#define YY_DECL int yylex (yyscan_t yyscanner, cmListFileLexer* lexer)
/* Disable some warnings. */
#if defined(_MSC_VER)
# pragma warning ( disable : 4127 )
# pragma warning ( disable : 4131 )
# pragma warning ( disable : 4244 )
# pragma warning ( disable : 4251 )
# pragma warning ( disable : 4267 )
# pragma warning ( disable : 4305 )
# pragma warning ( disable : 4309 )
# pragma warning ( disable : 4706 )
# pragma warning ( disable : 4786 )
#endif
#include "cmListFileLexer.h"
/*--------------------------------------------------------------------------*/
struct cmListFileLexer_s
{
cmListFileLexer_Token token;
int line;
int column;
int size;
FILE* file;
yyscan_t scanner;
};
static void cmListFileLexerSetToken(cmListFileLexer* lexer, const char* text,
int length);
static void cmListFileLexerAppend(cmListFileLexer* lexer, const char* text,
int length);
/*--------------------------------------------------------------------------*/
%}
%option reentrant
%option yylineno
%option noyywrap
%pointer
%x STRING
%%
\n {
lexer->token.type = cmListFileLexer_Token_Newline;
cmListFileLexerSetToken(lexer, yytext, yyleng);
++lexer->line;
lexer->column = 1;
return 1;
}
#.* {
lexer->column += yyleng;
}
\( {
lexer->token.type = cmListFileLexer_Token_ParenLeft;
cmListFileLexerSetToken(lexer, yytext, yyleng);
lexer->column += yyleng;
return 1;
}
\) {
lexer->token.type = cmListFileLexer_Token_ParenRight;
cmListFileLexerSetToken(lexer, yytext, yyleng);
lexer->column += yyleng;
return 1;
}
[A-Za-z_][A-Za-z0-9_]+ {
lexer->token.type = cmListFileLexer_Token_Identifier;
cmListFileLexerSetToken(lexer, yytext, yyleng);
lexer->column += yyleng;
return 1;
}
([^ \t\r\n\(\)\"\\]|\\.)+ {
lexer->token.type = cmListFileLexer_Token_ArgumentUnquoted;
cmListFileLexerSetToken(lexer, yytext, yyleng);
lexer->column += yyleng;
return 1;
}
\" {
lexer->token.type = cmListFileLexer_Token_ArgumentQuoted;
cmListFileLexerSetToken(lexer, "", 0);
lexer->column += yyleng;
BEGIN(STRING);
}
<STRING>([^\\\n\"]|\\(.|\n))+ {
cmListFileLexerAppend(lexer, yytext, yyleng);
lexer->column += yyleng;
}
<STRING>\n {
cmListFileLexerAppend(lexer, yytext, yyleng);
++lexer->line;
lexer->column = 1;
}
<STRING>\" {
lexer->column += yyleng;
BEGIN(INITIAL);
return 1;
}
<STRING>. {
cmListFileLexerAppend(lexer, yytext, yyleng);
lexer->column += yyleng;
}
[ \t\r] {
lexer->column += yyleng;
}
. {
lexer->token.type = cmListFileLexer_Token_Error;
cmListFileLexerSetToken(lexer, yytext, yyleng);
lexer->column += yyleng;
return 1;
}
<<EOF>> {
lexer->token.type = cmListFileLexer_Token_None;
cmListFileLexerSetToken(lexer, 0, 0);
return 0;
}
%%
/*--------------------------------------------------------------------------*/
void cmListFileLexerSetToken(cmListFileLexer* lexer, const char* text,
int length)
{
/* Set the token line and column number. */
lexer->token.line = lexer->line;
lexer->token.column = lexer->column;
/* Use the same buffer if possible. */
if(lexer->token.text)
{
if(text && length < lexer->size)
{
strcpy(lexer->token.text, text);
lexer->token.length = length;
return;
}
free(lexer->token.text);
lexer->token.text = 0;
lexer->size = 0;
}
/* Need to extend the buffer. */
if(text)
{
lexer->token.text = strdup(text);
lexer->token.length = length;
lexer->size = length+1;
}
else
{
lexer->token.length = 0;
}
}
/*--------------------------------------------------------------------------*/
void cmListFileLexerAppend(cmListFileLexer* lexer, const char* text,
int length)
{
char* temp;
int newSize;
/* If the appended text will fit in the buffer, do not reallocate. */
newSize = lexer->token.length + length + 1;
if(lexer->token.text && newSize <= lexer->size)
{
strcpy(lexer->token.text+lexer->token.length, text);
lexer->token.length += length;
return;
}
/* We need to extend the buffer. */
temp = malloc(newSize);
if(lexer->token.text)
{
memcpy(temp, lexer->token.text, lexer->token.length);
free(lexer->token.text);
}
memcpy(temp+lexer->token.length, text, length);
temp[lexer->token.length+length] = 0;
lexer->token.text = temp;
lexer->token.length += length;
lexer->size = newSize;
}
/*--------------------------------------------------------------------------*/
cmListFileLexer* cmListFileLexer_New()
{
cmListFileLexer* lexer = (cmListFileLexer*)malloc(sizeof(cmListFileLexer));
if(!lexer)
{
return 0;
}
memset(lexer, 0, sizeof(*lexer));
lexer->line = 1;
lexer->column = 1;
return lexer;
}
/*--------------------------------------------------------------------------*/
void cmListFileLexer_Delete(cmListFileLexer* lexer)
{
cmListFileLexer_SetFileName(lexer, 0);
free(lexer);
}
/*--------------------------------------------------------------------------*/
int cmListFileLexer_SetFileName(cmListFileLexer* lexer, const char* name)
{
int result = 1;
if(lexer->file)
{
yylex_destroy(lexer->scanner);
fclose(lexer->file);
lexer->file = 0;
}
if(name)
{
lexer->file = fopen(name, "r");
if(!lexer->file)
{
result = 0;
}
}
if(lexer->file)
{
yylex_init(&lexer->scanner);
yyset_in(lexer->file, lexer->scanner);
}
return result;
}
/*--------------------------------------------------------------------------*/
cmListFileLexer_Token* cmListFileLexer_Scan(cmListFileLexer* lexer)
{
if(!lexer->file)
{
return 0;
}
if(yylex(lexer->scanner, lexer))
{
return &lexer->token;
}
else
{
cmListFileLexer_SetFileName(lexer, 0);
return 0;
}
}
/*--------------------------------------------------------------------------*/
long cmListFileLexer_GetCurrentLine(cmListFileLexer* lexer)
{
if(lexer->file)
{
return lexer->line;
}
else
{
return 0;
}
}
/*--------------------------------------------------------------------------*/
long cmListFileLexer_GetCurrentColumn(cmListFileLexer* lexer)
{
if(lexer->file)
{
return lexer->column;
}
else
{
return 0;
}
}