ENH: Exposed pattern->regex API. Cleaned up and commented implementation of pattern->regex conversion.

This commit is contained in:
Brad King 2006-08-21 14:17:58 -04:00
parent 07fa9ac09c
commit f1ea7e88dc
2 changed files with 112 additions and 89 deletions

View File

@ -39,14 +39,14 @@
#include <string.h> #include <string.h>
namespace KWSYS_NAMESPACE namespace KWSYS_NAMESPACE
{ {
#if defined( _WIN32 ) || defined( APPLE ) || defined( __CYGWIN__ ) #if defined(_WIN32) || defined(APPLE) || defined(__CYGWIN__)
// On Windows and apple, no difference between lower and upper case // On Windows and apple, no difference between lower and upper case
#define KWSYS_GLOB_CASE_INDEPENDENT # define KWSYS_GLOB_CASE_INDEPENDENT
#endif #endif
#if defined( _WIN32 ) || defined( __CYGWIN__ ) #if defined(_WIN32) || defined(__CYGWIN__)
// Handle network paths // Handle network paths
#define KWSYS_GLOB_SUPPORT_NETWORK_PATHS # define KWSYS_GLOB_SUPPORT_NETWORK_PATHS
#endif #endif
//---------------------------------------------------------------------------- //----------------------------------------------------------------------------
@ -55,7 +55,6 @@ class GlobInternals
public: public:
kwsys_stl::vector<kwsys_stl::string> Files; kwsys_stl::vector<kwsys_stl::string> Files;
kwsys_stl::vector<kwsys::RegularExpression> Expressions; kwsys_stl::vector<kwsys::RegularExpression> Expressions;
kwsys_stl::vector<kwsys_stl::string> TextExpressions;
}; };
//---------------------------------------------------------------------------- //----------------------------------------------------------------------------
@ -72,27 +71,6 @@ Glob::~Glob()
delete this->Internals; delete this->Internals;
} }
//----------------------------------------------------------------------------
void Glob::Escape(int ch, char* buffer)
{
if (! (
'a' <= ch && ch <= 'z' ||
'A' <= ch && ch <= 'Z' ||
'0' <= ch && ch <= '9') )
{
sprintf(buffer, "\\%c", ch);
}
else
{
#if defined( KWSYS_GLOB_CASE_INDEPENDENT )
// On Windows and apple, no difference between lower and upper case
sprintf(buffer, "%c", tolower(ch));
#else
sprintf(buffer, "%c", ch);
#endif
}
}
//---------------------------------------------------------------------------- //----------------------------------------------------------------------------
kwsys_stl::vector<kwsys_stl::string>& Glob::GetFiles() kwsys_stl::vector<kwsys_stl::string>& Glob::GetFiles()
{ {
@ -100,82 +78,126 @@ kwsys_stl::vector<kwsys_stl::string>& Glob::GetFiles()
} }
//---------------------------------------------------------------------------- //----------------------------------------------------------------------------
kwsys_stl::string Glob::ConvertExpression(const kwsys_stl::string& expr) kwsys_stl::string Glob::PatternToRegex(const kwsys_stl::string& pattern,
bool require_whole_string)
{ {
// Incrementally build the regular expression from the pattern.
kwsys_stl::string::size_type i = 0; kwsys_stl::string regex = require_whole_string? "^" : "";
kwsys_stl::string::size_type n = expr.size(); kwsys_stl::string::const_iterator pattern_first = pattern.begin();
kwsys_stl::string::const_iterator pattern_last = pattern.end();
kwsys_stl::string res = "^"; for(kwsys_stl::string::const_iterator i = pattern_first;
kwsys_stl::string stuff = ""; i != pattern_last; ++i)
while ( i < n )
{ {
int c = expr[i]; int c = *i;
i = i+1; if(c == '*')
if ( c == '*' )
{ {
res = res + ".*"; // A '*' (not between brackets) matches any string.
regex += ".*";
} }
else if ( c == '?' ) else if(c == '?')
{ {
res = res + "."; // A '?' (not between brackets) matches any single character.
regex += ".";
} }
else if ( c == '[' ) else if(c == '[')
{ {
kwsys_stl::string::size_type j = i; // Parse out the bracket expression. It begins just after the
if ( j < n && ( expr[j] == '!' || expr[j] == '^' ) ) // opening character.
kwsys_stl::string::const_iterator bracket_first = i+1;
kwsys_stl::string::const_iterator bracket_last = bracket_first;
// The first character may be complementation '!' or '^'.
if(bracket_last != pattern_last &&
(*bracket_last == '!' || *bracket_last == '^'))
{ {
j = j+1; ++bracket_last;
} }
if ( j < n && expr[j] == ']' )
// If the next character is a ']' it is included in the brackets
// because the bracket string may not be empty.
if(bracket_last != pattern_last && *bracket_last == ']')
{ {
j = j+1; ++bracket_last;
} }
while ( j < n && expr[j] != ']' )
// Search for the closing ']'.
while(bracket_last != pattern_last && *bracket_last != ']')
{ {
j = j+1; ++bracket_last;
} }
if ( j >= n )
// Check whether we have a complete bracket string.
if(bracket_last == pattern_last)
{ {
res = res + "\\["; // The bracket string did not end, so it was opened simply by
// a '[' that is supposed to be matched literally.
regex += "\\[";
} }
else else
{ {
stuff = ""; // Convert the bracket string to its regex equivalent.
kwsys_stl::string::size_type cc; kwsys_stl::string::const_iterator k = bracket_first;
for ( cc = i; cc < j; cc ++ )
// Open the regex block.
regex += "[";
// A regex range complement uses '^' instead of '!'.
if(k != bracket_last && *k == '!')
{ {
if ( expr[cc] == '\\' ) regex += "^";
++k;
}
// Convert the remaining characters.
for(; k != bracket_last; ++k)
{
// Backslashes must be escaped.
if(*k == '\\')
{ {
stuff += "\\\\"; regex += "\\";
}
else
{
stuff += expr[cc];
} }
// Store this character.
regex += *k;
} }
i = j+1;
if ( stuff[0] == '!' || stuff[0] == '^' ) // Close the regex block.
{ regex += "]";
stuff = '^' + stuff.substr(1);
} // Jump to the end of the bracket string.
else if ( stuff[0] == '^' ) i = bracket_last;
{
stuff = '\\' + stuff;
}
res = res + "[" + stuff + "]";
} }
} }
else else
{ {
char buffer[100]; // A single character matches itself.
buffer[0] = 0; int ch = c;
this->Escape(c, buffer); if(!(('a' <= ch && ch <= 'z') ||
res = res + buffer; ('A' <= ch && ch <= 'Z') ||
('0' <= ch && ch <= '9')))
{
// Escape the non-alphanumeric character.
regex += "\\";
}
#if defined(KWSYS_GLOB_CASE_INDEPENDENT)
else
{
// On case-insensitive systems file names are converted to lower
// case before matching.
ch = tolower(ch);
}
#endif
// Store the character.
regex.append(1, static_cast<char>(ch));
} }
} }
return res + "$";
if(require_whole_string)
{
regex += "$";
}
return regex;
} }
//---------------------------------------------------------------------------- //----------------------------------------------------------------------------
@ -276,8 +298,8 @@ void Glob::ProcessDirectory(kwsys_stl::string::size_type start,
realname = dir + "/" + fname; realname = dir + "/" + fname;
} }
#if defined( KWSYS_GLOB_CASE_INDEPENDENT ) #if defined(KWSYS_GLOB_CASE_INDEPENDENT)
// On Windows and apple, no difference between lower and upper case // On case-insensitive file systems convert to lower case for matching.
fname = kwsys::SystemTools::LowerCase(fname); fname = kwsys::SystemTools::LowerCase(fname);
#endif #endif
@ -427,8 +449,7 @@ void Glob::AddExpression(const char* expr)
{ {
this->Internals->Expressions.push_back( this->Internals->Expressions.push_back(
kwsys::RegularExpression( kwsys::RegularExpression(
this->ConvertExpression(expr).c_str())); this->PatternToRegex(expr).c_str()));
this->Internals->TextExpressions.push_back(this->ConvertExpression(expr));
} }
//---------------------------------------------------------------------------- //----------------------------------------------------------------------------

View File

@ -61,6 +61,16 @@ public:
void SetRelative(const char* dir); void SetRelative(const char* dir);
const char* GetRelative(); const char* GetRelative();
/** Convert the given globbing pattern to a regular expression.
There is no way to quote meta-characters. The
require_whole_string argument specifies whether the regex is
automatically surrounded by "^" and "$" to match the whole
string. This is on by default because patterns always match
whole strings, but may be disabled to support concatenating
expressions more easily (regex1|regex2|etc). */
static kwsys_stl::string PatternToRegex(const kwsys_stl::string& pattern,
bool require_whole_string = true);
protected: protected:
//! Process directory //! Process directory
void ProcessDirectory(kwsys_stl::string::size_type start, void ProcessDirectory(kwsys_stl::string::size_type start,
@ -71,14 +81,6 @@ protected:
void RecurseDirectory(kwsys_stl::string::size_type start, void RecurseDirectory(kwsys_stl::string::size_type start,
const kwsys_stl::string& dir, bool dir_only); const kwsys_stl::string& dir, bool dir_only);
//! Escape all non-alphanumeric characters in pattern.
void Escape(int ch, char* buffer);
//!
// Translate a shell PATTERN to a regular expression.
// There is no way to quote meta-characters.
kwsys_stl::string ConvertExpression(const kwsys_stl::string& expr);
//! Add regular expression //! Add regular expression
void AddExpression(const char* expr); void AddExpression(const char* expr);