ENH: Exposed pattern->regex API. Cleaned up and commented implementation of pattern->regex conversion.
This commit is contained in:
parent
07fa9ac09c
commit
f1ea7e88dc
|
@ -55,7 +55,6 @@ class GlobInternals
|
|||
public:
|
||||
kwsys_stl::vector<kwsys_stl::string> Files;
|
||||
kwsys_stl::vector<kwsys::RegularExpression> Expressions;
|
||||
kwsys_stl::vector<kwsys_stl::string> TextExpressions;
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
@ -72,27 +71,6 @@ Glob::~Glob()
|
|||
delete this->Internals;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
void Glob::Escape(int ch, char* buffer)
|
||||
{
|
||||
if (! (
|
||||
'a' <= ch && ch <= 'z' ||
|
||||
'A' <= ch && ch <= 'Z' ||
|
||||
'0' <= ch && ch <= '9') )
|
||||
{
|
||||
sprintf(buffer, "\\%c", ch);
|
||||
}
|
||||
else
|
||||
{
|
||||
#if defined( KWSYS_GLOB_CASE_INDEPENDENT )
|
||||
// On Windows and apple, no difference between lower and upper case
|
||||
sprintf(buffer, "%c", tolower(ch));
|
||||
#else
|
||||
sprintf(buffer, "%c", ch);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
kwsys_stl::vector<kwsys_stl::string>& Glob::GetFiles()
|
||||
{
|
||||
|
@ -100,82 +78,126 @@ kwsys_stl::vector<kwsys_stl::string>& Glob::GetFiles()
|
|||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
kwsys_stl::string Glob::ConvertExpression(const kwsys_stl::string& expr)
|
||||
kwsys_stl::string Glob::PatternToRegex(const kwsys_stl::string& pattern,
|
||||
bool require_whole_string)
|
||||
{
|
||||
|
||||
kwsys_stl::string::size_type i = 0;
|
||||
kwsys_stl::string::size_type n = expr.size();
|
||||
|
||||
kwsys_stl::string res = "^";
|
||||
kwsys_stl::string stuff = "";
|
||||
|
||||
while ( i < n )
|
||||
// Incrementally build the regular expression from the pattern.
|
||||
kwsys_stl::string regex = require_whole_string? "^" : "";
|
||||
kwsys_stl::string::const_iterator pattern_first = pattern.begin();
|
||||
kwsys_stl::string::const_iterator pattern_last = pattern.end();
|
||||
for(kwsys_stl::string::const_iterator i = pattern_first;
|
||||
i != pattern_last; ++i)
|
||||
{
|
||||
int c = expr[i];
|
||||
i = i+1;
|
||||
int c = *i;
|
||||
if(c == '*')
|
||||
{
|
||||
res = res + ".*";
|
||||
// A '*' (not between brackets) matches any string.
|
||||
regex += ".*";
|
||||
}
|
||||
else if(c == '?')
|
||||
{
|
||||
res = res + ".";
|
||||
// A '?' (not between brackets) matches any single character.
|
||||
regex += ".";
|
||||
}
|
||||
else if(c == '[')
|
||||
{
|
||||
kwsys_stl::string::size_type j = i;
|
||||
if ( j < n && ( expr[j] == '!' || expr[j] == '^' ) )
|
||||
// Parse out the bracket expression. It begins just after the
|
||||
// opening character.
|
||||
kwsys_stl::string::const_iterator bracket_first = i+1;
|
||||
kwsys_stl::string::const_iterator bracket_last = bracket_first;
|
||||
|
||||
// The first character may be complementation '!' or '^'.
|
||||
if(bracket_last != pattern_last &&
|
||||
(*bracket_last == '!' || *bracket_last == '^'))
|
||||
{
|
||||
j = j+1;
|
||||
++bracket_last;
|
||||
}
|
||||
if ( j < n && expr[j] == ']' )
|
||||
|
||||
// If the next character is a ']' it is included in the brackets
|
||||
// because the bracket string may not be empty.
|
||||
if(bracket_last != pattern_last && *bracket_last == ']')
|
||||
{
|
||||
j = j+1;
|
||||
++bracket_last;
|
||||
}
|
||||
while ( j < n && expr[j] != ']' )
|
||||
|
||||
// Search for the closing ']'.
|
||||
while(bracket_last != pattern_last && *bracket_last != ']')
|
||||
{
|
||||
j = j+1;
|
||||
++bracket_last;
|
||||
}
|
||||
if ( j >= n )
|
||||
|
||||
// Check whether we have a complete bracket string.
|
||||
if(bracket_last == pattern_last)
|
||||
{
|
||||
res = res + "\\[";
|
||||
// The bracket string did not end, so it was opened simply by
|
||||
// a '[' that is supposed to be matched literally.
|
||||
regex += "\\[";
|
||||
}
|
||||
else
|
||||
{
|
||||
stuff = "";
|
||||
kwsys_stl::string::size_type cc;
|
||||
for ( cc = i; cc < j; cc ++ )
|
||||
// Convert the bracket string to its regex equivalent.
|
||||
kwsys_stl::string::const_iterator k = bracket_first;
|
||||
|
||||
// Open the regex block.
|
||||
regex += "[";
|
||||
|
||||
// A regex range complement uses '^' instead of '!'.
|
||||
if(k != bracket_last && *k == '!')
|
||||
{
|
||||
if ( expr[cc] == '\\' )
|
||||
regex += "^";
|
||||
++k;
|
||||
}
|
||||
|
||||
// Convert the remaining characters.
|
||||
for(; k != bracket_last; ++k)
|
||||
{
|
||||
stuff += "\\\\";
|
||||
}
|
||||
else
|
||||
// Backslashes must be escaped.
|
||||
if(*k == '\\')
|
||||
{
|
||||
stuff += expr[cc];
|
||||
regex += "\\";
|
||||
}
|
||||
|
||||
// Store this character.
|
||||
regex += *k;
|
||||
}
|
||||
i = j+1;
|
||||
if ( stuff[0] == '!' || stuff[0] == '^' )
|
||||
{
|
||||
stuff = '^' + stuff.substr(1);
|
||||
}
|
||||
else if ( stuff[0] == '^' )
|
||||
{
|
||||
stuff = '\\' + stuff;
|
||||
}
|
||||
res = res + "[" + stuff + "]";
|
||||
|
||||
// Close the regex block.
|
||||
regex += "]";
|
||||
|
||||
// Jump to the end of the bracket string.
|
||||
i = bracket_last;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
char buffer[100];
|
||||
buffer[0] = 0;
|
||||
this->Escape(c, buffer);
|
||||
res = res + buffer;
|
||||
// A single character matches itself.
|
||||
int ch = c;
|
||||
if(!(('a' <= ch && ch <= 'z') ||
|
||||
('A' <= ch && ch <= 'Z') ||
|
||||
('0' <= ch && ch <= '9')))
|
||||
{
|
||||
// Escape the non-alphanumeric character.
|
||||
regex += "\\";
|
||||
}
|
||||
#if defined(KWSYS_GLOB_CASE_INDEPENDENT)
|
||||
else
|
||||
{
|
||||
// On case-insensitive systems file names are converted to lower
|
||||
// case before matching.
|
||||
ch = tolower(ch);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Store the character.
|
||||
regex.append(1, static_cast<char>(ch));
|
||||
}
|
||||
}
|
||||
return res + "$";
|
||||
|
||||
if(require_whole_string)
|
||||
{
|
||||
regex += "$";
|
||||
}
|
||||
return regex;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
@ -277,7 +299,7 @@ void Glob::ProcessDirectory(kwsys_stl::string::size_type start,
|
|||
}
|
||||
|
||||
#if defined(KWSYS_GLOB_CASE_INDEPENDENT)
|
||||
// On Windows and apple, no difference between lower and upper case
|
||||
// On case-insensitive file systems convert to lower case for matching.
|
||||
fname = kwsys::SystemTools::LowerCase(fname);
|
||||
#endif
|
||||
|
||||
|
@ -427,8 +449,7 @@ void Glob::AddExpression(const char* expr)
|
|||
{
|
||||
this->Internals->Expressions.push_back(
|
||||
kwsys::RegularExpression(
|
||||
this->ConvertExpression(expr).c_str()));
|
||||
this->Internals->TextExpressions.push_back(this->ConvertExpression(expr));
|
||||
this->PatternToRegex(expr).c_str()));
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
|
|
@ -61,6 +61,16 @@ public:
|
|||
void SetRelative(const char* dir);
|
||||
const char* GetRelative();
|
||||
|
||||
/** Convert the given globbing pattern to a regular expression.
|
||||
There is no way to quote meta-characters. The
|
||||
require_whole_string argument specifies whether the regex is
|
||||
automatically surrounded by "^" and "$" to match the whole
|
||||
string. This is on by default because patterns always match
|
||||
whole strings, but may be disabled to support concatenating
|
||||
expressions more easily (regex1|regex2|etc). */
|
||||
static kwsys_stl::string PatternToRegex(const kwsys_stl::string& pattern,
|
||||
bool require_whole_string = true);
|
||||
|
||||
protected:
|
||||
//! Process directory
|
||||
void ProcessDirectory(kwsys_stl::string::size_type start,
|
||||
|
@ -71,14 +81,6 @@ protected:
|
|||
void RecurseDirectory(kwsys_stl::string::size_type start,
|
||||
const kwsys_stl::string& dir, bool dir_only);
|
||||
|
||||
//! Escape all non-alphanumeric characters in pattern.
|
||||
void Escape(int ch, char* buffer);
|
||||
|
||||
//!
|
||||
// Translate a shell PATTERN to a regular expression.
|
||||
// There is no way to quote meta-characters.
|
||||
kwsys_stl::string ConvertExpression(const kwsys_stl::string& expr);
|
||||
|
||||
//! Add regular expression
|
||||
void AddExpression(const char* expr);
|
||||
|
||||
|
|
Loading…
Reference in New Issue