ENH: Exposed pattern->regex API. Cleaned up and commented implementation of pattern->regex conversion.
This commit is contained in:
parent
07fa9ac09c
commit
f1ea7e88dc
|
@ -39,14 +39,14 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
namespace KWSYS_NAMESPACE
|
namespace KWSYS_NAMESPACE
|
||||||
{
|
{
|
||||||
#if defined( _WIN32 ) || defined( APPLE ) || defined( __CYGWIN__ )
|
#if defined(_WIN32) || defined(APPLE) || defined(__CYGWIN__)
|
||||||
// On Windows and apple, no difference between lower and upper case
|
// On Windows and apple, no difference between lower and upper case
|
||||||
#define KWSYS_GLOB_CASE_INDEPENDENT
|
# define KWSYS_GLOB_CASE_INDEPENDENT
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined( _WIN32 ) || defined( __CYGWIN__ )
|
#if defined(_WIN32) || defined(__CYGWIN__)
|
||||||
// Handle network paths
|
// Handle network paths
|
||||||
#define KWSYS_GLOB_SUPPORT_NETWORK_PATHS
|
# define KWSYS_GLOB_SUPPORT_NETWORK_PATHS
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
//----------------------------------------------------------------------------
|
||||||
|
@ -55,7 +55,6 @@ class GlobInternals
|
||||||
public:
|
public:
|
||||||
kwsys_stl::vector<kwsys_stl::string> Files;
|
kwsys_stl::vector<kwsys_stl::string> Files;
|
||||||
kwsys_stl::vector<kwsys::RegularExpression> Expressions;
|
kwsys_stl::vector<kwsys::RegularExpression> Expressions;
|
||||||
kwsys_stl::vector<kwsys_stl::string> TextExpressions;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
//----------------------------------------------------------------------------
|
||||||
|
@ -72,27 +71,6 @@ Glob::~Glob()
|
||||||
delete this->Internals;
|
delete this->Internals;
|
||||||
}
|
}
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
void Glob::Escape(int ch, char* buffer)
|
|
||||||
{
|
|
||||||
if (! (
|
|
||||||
'a' <= ch && ch <= 'z' ||
|
|
||||||
'A' <= ch && ch <= 'Z' ||
|
|
||||||
'0' <= ch && ch <= '9') )
|
|
||||||
{
|
|
||||||
sprintf(buffer, "\\%c", ch);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
#if defined( KWSYS_GLOB_CASE_INDEPENDENT )
|
|
||||||
// On Windows and apple, no difference between lower and upper case
|
|
||||||
sprintf(buffer, "%c", tolower(ch));
|
|
||||||
#else
|
|
||||||
sprintf(buffer, "%c", ch);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
//----------------------------------------------------------------------------
|
||||||
kwsys_stl::vector<kwsys_stl::string>& Glob::GetFiles()
|
kwsys_stl::vector<kwsys_stl::string>& Glob::GetFiles()
|
||||||
{
|
{
|
||||||
|
@ -100,82 +78,126 @@ kwsys_stl::vector<kwsys_stl::string>& Glob::GetFiles()
|
||||||
}
|
}
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
//----------------------------------------------------------------------------
|
||||||
kwsys_stl::string Glob::ConvertExpression(const kwsys_stl::string& expr)
|
kwsys_stl::string Glob::PatternToRegex(const kwsys_stl::string& pattern,
|
||||||
|
bool require_whole_string)
|
||||||
{
|
{
|
||||||
|
// Incrementally build the regular expression from the pattern.
|
||||||
kwsys_stl::string::size_type i = 0;
|
kwsys_stl::string regex = require_whole_string? "^" : "";
|
||||||
kwsys_stl::string::size_type n = expr.size();
|
kwsys_stl::string::const_iterator pattern_first = pattern.begin();
|
||||||
|
kwsys_stl::string::const_iterator pattern_last = pattern.end();
|
||||||
kwsys_stl::string res = "^";
|
for(kwsys_stl::string::const_iterator i = pattern_first;
|
||||||
kwsys_stl::string stuff = "";
|
i != pattern_last; ++i)
|
||||||
|
|
||||||
while ( i < n )
|
|
||||||
{
|
{
|
||||||
int c = expr[i];
|
int c = *i;
|
||||||
i = i+1;
|
if(c == '*')
|
||||||
if ( c == '*' )
|
|
||||||
{
|
{
|
||||||
res = res + ".*";
|
// A '*' (not between brackets) matches any string.
|
||||||
|
regex += ".*";
|
||||||
}
|
}
|
||||||
else if ( c == '?' )
|
else if(c == '?')
|
||||||
{
|
{
|
||||||
res = res + ".";
|
// A '?' (not between brackets) matches any single character.
|
||||||
|
regex += ".";
|
||||||
}
|
}
|
||||||
else if ( c == '[' )
|
else if(c == '[')
|
||||||
{
|
{
|
||||||
kwsys_stl::string::size_type j = i;
|
// Parse out the bracket expression. It begins just after the
|
||||||
if ( j < n && ( expr[j] == '!' || expr[j] == '^' ) )
|
// opening character.
|
||||||
|
kwsys_stl::string::const_iterator bracket_first = i+1;
|
||||||
|
kwsys_stl::string::const_iterator bracket_last = bracket_first;
|
||||||
|
|
||||||
|
// The first character may be complementation '!' or '^'.
|
||||||
|
if(bracket_last != pattern_last &&
|
||||||
|
(*bracket_last == '!' || *bracket_last == '^'))
|
||||||
{
|
{
|
||||||
j = j+1;
|
++bracket_last;
|
||||||
}
|
}
|
||||||
if ( j < n && expr[j] == ']' )
|
|
||||||
|
// If the next character is a ']' it is included in the brackets
|
||||||
|
// because the bracket string may not be empty.
|
||||||
|
if(bracket_last != pattern_last && *bracket_last == ']')
|
||||||
{
|
{
|
||||||
j = j+1;
|
++bracket_last;
|
||||||
}
|
}
|
||||||
while ( j < n && expr[j] != ']' )
|
|
||||||
|
// Search for the closing ']'.
|
||||||
|
while(bracket_last != pattern_last && *bracket_last != ']')
|
||||||
{
|
{
|
||||||
j = j+1;
|
++bracket_last;
|
||||||
}
|
}
|
||||||
if ( j >= n )
|
|
||||||
|
// Check whether we have a complete bracket string.
|
||||||
|
if(bracket_last == pattern_last)
|
||||||
{
|
{
|
||||||
res = res + "\\[";
|
// The bracket string did not end, so it was opened simply by
|
||||||
|
// a '[' that is supposed to be matched literally.
|
||||||
|
regex += "\\[";
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
stuff = "";
|
// Convert the bracket string to its regex equivalent.
|
||||||
kwsys_stl::string::size_type cc;
|
kwsys_stl::string::const_iterator k = bracket_first;
|
||||||
for ( cc = i; cc < j; cc ++ )
|
|
||||||
|
// Open the regex block.
|
||||||
|
regex += "[";
|
||||||
|
|
||||||
|
// A regex range complement uses '^' instead of '!'.
|
||||||
|
if(k != bracket_last && *k == '!')
|
||||||
{
|
{
|
||||||
if ( expr[cc] == '\\' )
|
regex += "^";
|
||||||
|
++k;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert the remaining characters.
|
||||||
|
for(; k != bracket_last; ++k)
|
||||||
|
{
|
||||||
|
// Backslashes must be escaped.
|
||||||
|
if(*k == '\\')
|
||||||
{
|
{
|
||||||
stuff += "\\\\";
|
regex += "\\";
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
stuff += expr[cc];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Store this character.
|
||||||
|
regex += *k;
|
||||||
}
|
}
|
||||||
i = j+1;
|
|
||||||
if ( stuff[0] == '!' || stuff[0] == '^' )
|
// Close the regex block.
|
||||||
{
|
regex += "]";
|
||||||
stuff = '^' + stuff.substr(1);
|
|
||||||
}
|
// Jump to the end of the bracket string.
|
||||||
else if ( stuff[0] == '^' )
|
i = bracket_last;
|
||||||
{
|
|
||||||
stuff = '\\' + stuff;
|
|
||||||
}
|
|
||||||
res = res + "[" + stuff + "]";
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
char buffer[100];
|
// A single character matches itself.
|
||||||
buffer[0] = 0;
|
int ch = c;
|
||||||
this->Escape(c, buffer);
|
if(!(('a' <= ch && ch <= 'z') ||
|
||||||
res = res + buffer;
|
('A' <= ch && ch <= 'Z') ||
|
||||||
|
('0' <= ch && ch <= '9')))
|
||||||
|
{
|
||||||
|
// Escape the non-alphanumeric character.
|
||||||
|
regex += "\\";
|
||||||
|
}
|
||||||
|
#if defined(KWSYS_GLOB_CASE_INDEPENDENT)
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// On case-insensitive systems file names are converted to lower
|
||||||
|
// case before matching.
|
||||||
|
ch = tolower(ch);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Store the character.
|
||||||
|
regex.append(1, static_cast<char>(ch));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return res + "$";
|
|
||||||
|
if(require_whole_string)
|
||||||
|
{
|
||||||
|
regex += "$";
|
||||||
|
}
|
||||||
|
return regex;
|
||||||
}
|
}
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
//----------------------------------------------------------------------------
|
||||||
|
@ -276,8 +298,8 @@ void Glob::ProcessDirectory(kwsys_stl::string::size_type start,
|
||||||
realname = dir + "/" + fname;
|
realname = dir + "/" + fname;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined( KWSYS_GLOB_CASE_INDEPENDENT )
|
#if defined(KWSYS_GLOB_CASE_INDEPENDENT)
|
||||||
// On Windows and apple, no difference between lower and upper case
|
// On case-insensitive file systems convert to lower case for matching.
|
||||||
fname = kwsys::SystemTools::LowerCase(fname);
|
fname = kwsys::SystemTools::LowerCase(fname);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -427,8 +449,7 @@ void Glob::AddExpression(const char* expr)
|
||||||
{
|
{
|
||||||
this->Internals->Expressions.push_back(
|
this->Internals->Expressions.push_back(
|
||||||
kwsys::RegularExpression(
|
kwsys::RegularExpression(
|
||||||
this->ConvertExpression(expr).c_str()));
|
this->PatternToRegex(expr).c_str()));
|
||||||
this->Internals->TextExpressions.push_back(this->ConvertExpression(expr));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
//----------------------------------------------------------------------------
|
||||||
|
|
|
@ -61,6 +61,16 @@ public:
|
||||||
void SetRelative(const char* dir);
|
void SetRelative(const char* dir);
|
||||||
const char* GetRelative();
|
const char* GetRelative();
|
||||||
|
|
||||||
|
/** Convert the given globbing pattern to a regular expression.
|
||||||
|
There is no way to quote meta-characters. The
|
||||||
|
require_whole_string argument specifies whether the regex is
|
||||||
|
automatically surrounded by "^" and "$" to match the whole
|
||||||
|
string. This is on by default because patterns always match
|
||||||
|
whole strings, but may be disabled to support concatenating
|
||||||
|
expressions more easily (regex1|regex2|etc). */
|
||||||
|
static kwsys_stl::string PatternToRegex(const kwsys_stl::string& pattern,
|
||||||
|
bool require_whole_string = true);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
//! Process directory
|
//! Process directory
|
||||||
void ProcessDirectory(kwsys_stl::string::size_type start,
|
void ProcessDirectory(kwsys_stl::string::size_type start,
|
||||||
|
@ -71,14 +81,6 @@ protected:
|
||||||
void RecurseDirectory(kwsys_stl::string::size_type start,
|
void RecurseDirectory(kwsys_stl::string::size_type start,
|
||||||
const kwsys_stl::string& dir, bool dir_only);
|
const kwsys_stl::string& dir, bool dir_only);
|
||||||
|
|
||||||
//! Escape all non-alphanumeric characters in pattern.
|
|
||||||
void Escape(int ch, char* buffer);
|
|
||||||
|
|
||||||
//!
|
|
||||||
// Translate a shell PATTERN to a regular expression.
|
|
||||||
// There is no way to quote meta-characters.
|
|
||||||
kwsys_stl::string ConvertExpression(const kwsys_stl::string& expr);
|
|
||||||
|
|
||||||
//! Add regular expression
|
//! Add regular expression
|
||||||
void AddExpression(const char* expr);
|
void AddExpression(const char* expr);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue