From f1ea7e88dc8322d4344e6b7acde6bf7bcffd7806 Mon Sep 17 00:00:00 2001 From: Brad King Date: Mon, 21 Aug 2006 14:17:58 -0400 Subject: [PATCH] ENH: Exposed pattern->regex API. Cleaned up and commented implementation of pattern->regex conversion. --- Source/kwsys/Glob.cxx | 183 ++++++++++++++++++++++----------------- Source/kwsys/Glob.hxx.in | 18 ++-- 2 files changed, 112 insertions(+), 89 deletions(-) diff --git a/Source/kwsys/Glob.cxx b/Source/kwsys/Glob.cxx index 15f0e7194..11cc596ad 100644 --- a/Source/kwsys/Glob.cxx +++ b/Source/kwsys/Glob.cxx @@ -39,14 +39,14 @@ #include namespace KWSYS_NAMESPACE { -#if defined( _WIN32 ) || defined( APPLE ) || defined( __CYGWIN__ ) - // On Windows and apple, no difference between lower and upper case - #define KWSYS_GLOB_CASE_INDEPENDENT +#if defined(_WIN32) || defined(APPLE) || defined(__CYGWIN__) +// On Windows and apple, no difference between lower and upper case +# define KWSYS_GLOB_CASE_INDEPENDENT #endif -#if defined( _WIN32 ) || defined( __CYGWIN__ ) - // Handle network paths - #define KWSYS_GLOB_SUPPORT_NETWORK_PATHS +#if defined(_WIN32) || defined(__CYGWIN__) +// Handle network paths +# define KWSYS_GLOB_SUPPORT_NETWORK_PATHS #endif //---------------------------------------------------------------------------- @@ -55,7 +55,6 @@ class GlobInternals public: kwsys_stl::vector Files; kwsys_stl::vector Expressions; - kwsys_stl::vector TextExpressions; }; //---------------------------------------------------------------------------- @@ -72,27 +71,6 @@ Glob::~Glob() delete this->Internals; } -//---------------------------------------------------------------------------- -void Glob::Escape(int ch, char* buffer) -{ - if (! ( - 'a' <= ch && ch <= 'z' || - 'A' <= ch && ch <= 'Z' || - '0' <= ch && ch <= '9') ) - { - sprintf(buffer, "\\%c", ch); - } - else - { -#if defined( KWSYS_GLOB_CASE_INDEPENDENT ) - // On Windows and apple, no difference between lower and upper case - sprintf(buffer, "%c", tolower(ch)); -#else - sprintf(buffer, "%c", ch); -#endif - } -} - //---------------------------------------------------------------------------- kwsys_stl::vector& Glob::GetFiles() { @@ -100,82 +78,126 @@ kwsys_stl::vector& Glob::GetFiles() } //---------------------------------------------------------------------------- -kwsys_stl::string Glob::ConvertExpression(const kwsys_stl::string& expr) +kwsys_stl::string Glob::PatternToRegex(const kwsys_stl::string& pattern, + bool require_whole_string) { - - kwsys_stl::string::size_type i = 0; - kwsys_stl::string::size_type n = expr.size(); - - kwsys_stl::string res = "^"; - kwsys_stl::string stuff = ""; - - while ( i < n ) + // Incrementally build the regular expression from the pattern. + kwsys_stl::string regex = require_whole_string? "^" : ""; + kwsys_stl::string::const_iterator pattern_first = pattern.begin(); + kwsys_stl::string::const_iterator pattern_last = pattern.end(); + for(kwsys_stl::string::const_iterator i = pattern_first; + i != pattern_last; ++i) { - int c = expr[i]; - i = i+1; - if ( c == '*' ) + int c = *i; + if(c == '*') { - res = res + ".*"; + // A '*' (not between brackets) matches any string. + regex += ".*"; } - else if ( c == '?' ) + else if(c == '?') { - res = res + "."; + // A '?' (not between brackets) matches any single character. + regex += "."; } - else if ( c == '[' ) + else if(c == '[') { - kwsys_stl::string::size_type j = i; - if ( j < n && ( expr[j] == '!' || expr[j] == '^' ) ) + // Parse out the bracket expression. It begins just after the + // opening character. + kwsys_stl::string::const_iterator bracket_first = i+1; + kwsys_stl::string::const_iterator bracket_last = bracket_first; + + // The first character may be complementation '!' or '^'. + if(bracket_last != pattern_last && + (*bracket_last == '!' || *bracket_last == '^')) { - j = j+1; + ++bracket_last; } - if ( j < n && expr[j] == ']' ) + + // If the next character is a ']' it is included in the brackets + // because the bracket string may not be empty. + if(bracket_last != pattern_last && *bracket_last == ']') { - j = j+1; + ++bracket_last; } - while ( j < n && expr[j] != ']' ) + + // Search for the closing ']'. + while(bracket_last != pattern_last && *bracket_last != ']') { - j = j+1; + ++bracket_last; } - if ( j >= n ) + + // Check whether we have a complete bracket string. + if(bracket_last == pattern_last) { - res = res + "\\["; + // The bracket string did not end, so it was opened simply by + // a '[' that is supposed to be matched literally. + regex += "\\["; } else { - stuff = ""; - kwsys_stl::string::size_type cc; - for ( cc = i; cc < j; cc ++ ) + // Convert the bracket string to its regex equivalent. + kwsys_stl::string::const_iterator k = bracket_first; + + // Open the regex block. + regex += "["; + + // A regex range complement uses '^' instead of '!'. + if(k != bracket_last && *k == '!') { - if ( expr[cc] == '\\' ) + regex += "^"; + ++k; + } + + // Convert the remaining characters. + for(; k != bracket_last; ++k) + { + // Backslashes must be escaped. + if(*k == '\\') { - stuff += "\\\\"; - } - else - { - stuff += expr[cc]; + regex += "\\"; } + + // Store this character. + regex += *k; } - i = j+1; - if ( stuff[0] == '!' || stuff[0] == '^' ) - { - stuff = '^' + stuff.substr(1); - } - else if ( stuff[0] == '^' ) - { - stuff = '\\' + stuff; - } - res = res + "[" + stuff + "]"; + + // Close the regex block. + regex += "]"; + + // Jump to the end of the bracket string. + i = bracket_last; } } else { - char buffer[100]; - buffer[0] = 0; - this->Escape(c, buffer); - res = res + buffer; + // A single character matches itself. + int ch = c; + if(!(('a' <= ch && ch <= 'z') || + ('A' <= ch && ch <= 'Z') || + ('0' <= ch && ch <= '9'))) + { + // Escape the non-alphanumeric character. + regex += "\\"; + } +#if defined(KWSYS_GLOB_CASE_INDEPENDENT) + else + { + // On case-insensitive systems file names are converted to lower + // case before matching. + ch = tolower(ch); + } +#endif + + // Store the character. + regex.append(1, static_cast(ch)); } } - return res + "$"; + + if(require_whole_string) + { + regex += "$"; + } + return regex; } //---------------------------------------------------------------------------- @@ -276,8 +298,8 @@ void Glob::ProcessDirectory(kwsys_stl::string::size_type start, realname = dir + "/" + fname; } -#if defined( KWSYS_GLOB_CASE_INDEPENDENT ) - // On Windows and apple, no difference between lower and upper case +#if defined(KWSYS_GLOB_CASE_INDEPENDENT) + // On case-insensitive file systems convert to lower case for matching. fname = kwsys::SystemTools::LowerCase(fname); #endif @@ -427,8 +449,7 @@ void Glob::AddExpression(const char* expr) { this->Internals->Expressions.push_back( kwsys::RegularExpression( - this->ConvertExpression(expr).c_str())); - this->Internals->TextExpressions.push_back(this->ConvertExpression(expr)); + this->PatternToRegex(expr).c_str())); } //---------------------------------------------------------------------------- diff --git a/Source/kwsys/Glob.hxx.in b/Source/kwsys/Glob.hxx.in index 728276fca..6bb740daf 100644 --- a/Source/kwsys/Glob.hxx.in +++ b/Source/kwsys/Glob.hxx.in @@ -61,6 +61,16 @@ public: void SetRelative(const char* dir); const char* GetRelative(); + /** Convert the given globbing pattern to a regular expression. + There is no way to quote meta-characters. The + require_whole_string argument specifies whether the regex is + automatically surrounded by "^" and "$" to match the whole + string. This is on by default because patterns always match + whole strings, but may be disabled to support concatenating + expressions more easily (regex1|regex2|etc). */ + static kwsys_stl::string PatternToRegex(const kwsys_stl::string& pattern, + bool require_whole_string = true); + protected: //! Process directory void ProcessDirectory(kwsys_stl::string::size_type start, @@ -71,14 +81,6 @@ protected: void RecurseDirectory(kwsys_stl::string::size_type start, const kwsys_stl::string& dir, bool dir_only); - //! Escape all non-alphanumeric characters in pattern. - void Escape(int ch, char* buffer); - - //! - // Translate a shell PATTERN to a regular expression. - // There is no way to quote meta-characters. - kwsys_stl::string ConvertExpression(const kwsys_stl::string& expr); - //! Add regular expression void AddExpression(const char* expr);