From bcd25808ac540d279a36a9e0f1b60f152fc0d6bb Mon Sep 17 00:00:00 2001 From: Brad King Date: Fri, 25 Jan 2013 16:30:30 -0500 Subject: [PATCH] ExternalData: Improve series matching using an explicit syntax Automatic series recognition can generate false positives too easily when the default series configuration is flexible enough to handle common cases. Avoid false positives by requiring an explicit syntax to activate series recognition. Choose the syntax DATA{,:} to be short, simple, and look like a vertical ellipsis. This allows us to improve the default series match configuration. Allow series references to contain one of the numbered file names. Allow '-' as a separator in addition to '.' and '_'. Document what the default configuration matches. Also provide more options to configure series parsing. --- Modules/ExternalData.cmake | 82 +++++++++++++++++++++++++------------- 1 file changed, 55 insertions(+), 27 deletions(-) diff --git a/Modules/ExternalData.cmake b/Modules/ExternalData.cmake index d678cf2fa..0ef514c1f 100644 --- a/Modules/ExternalData.cmake +++ b/Modules/ExternalData.cmake @@ -66,18 +66,26 @@ # the source tree contains a content link such as "MyInput.png.md5" then the # "MyData" target creates a real "MyInput.png" in the build tree. # -# The DATA{} syntax can automatically recognize and fetch a file series. If -# the source tree contains a group of files or content links named like a -# series then a DATA{} reference to one member adds rules to fetch all of -# them. Although all members of a series are fetched, only the file -# originally named by the DATA{} argument is substituted for it. Two -# variables configure recognition of a series from DATA{}. First, -# ExternalData_SERIES_PARSE is a regex of the form "^(...)(...)(...)$" to -# parse , , and parts from . Second, -# ExternalData_SERIES_MATCH is a regex matching the part of series -# members named . Note that the of a series -# does not include a hash-algorithm extension. Both series configuration -# variables have default values that work well for common cases. +# The DATA{} syntax can be told to fetch a file series using the form +# "DATA{,:}", where the ":" is literal. If the source tree contains a +# group of files or content links named like a series then a reference to one +# member adds rules to fetch all of them. Although all members of a series +# are fetched, only the file originally named by the DATA{} argument is +# substituted for it. The default configuration recognizes file series names +# ending with "#.ext", "_#.ext", ".#.ext", or "-#.ext" where "#" is a sequence +# of decimal digits and ".ext" is any single extension. Configure it with a +# regex that parses and parts from the end of : +# ExternalData_SERIES_PARSE = regex of the form ()()$ +# For more complicated cases set: +# ExternalData_SERIES_PARSE = regex with at least two () groups +# ExternalData_SERIES_PARSE_PREFIX = regex group number, if any +# ExternalData_SERIES_PARSE_NUMBER = regex group number +# ExternalData_SERIES_PARSE_SUFFIX = regex group number +# Configure series number matching with a regex that matches the +# part of series members named : +# ExternalData_SERIES_MATCH = regex matching in all series members +# Note that the of a series does not include a hash-algorithm +# extension. # # The DATA{} syntax can alternatively match files associated with the named # file and contained in the same directory. Associated files may be specified @@ -349,6 +357,7 @@ function(_ExternalData_arg target arg options var_file) set(have_original 0) # Process options. + set(series_option "") set(associated_files "") set(associated_regex "") foreach(opt ${options}) @@ -356,6 +365,9 @@ function(_ExternalData_arg target arg options var_file) # Regular expression to match associated files. string(REGEX REPLACE "^REGEX:" "" regex "${opt}") list(APPEND associated_regex "${regex}") + elseif("x${opt}" MATCHES "^x:$") + # Activate series matching. + set(series_option "${opt}") elseif("x${opt}" MATCHES "^[^][:/*?]+$") # Specific associated file. list(APPEND associated_files "${opt}") @@ -365,16 +377,19 @@ function(_ExternalData_arg target arg options var_file) endif() endforeach() - if(associated_files OR associated_regex) - # Load the named data file and listed/matching associated files. - _ExternalData_arg_single() - _ExternalData_arg_associated() - elseif("${reldata}" MATCHES "(^|/)[^/.]+$") - # Files with no extension cannot be a series. - _ExternalData_arg_single() - else() - # Match a whole file series by default. + if(series_option) + if(associated_files OR associated_regex) + message(FATAL_ERROR "Series option \"${series_option}\" not allowed with associated files.") + endif() + # Load a whole file series. _ExternalData_arg_series() + else() + # Load the named data file. + _ExternalData_arg_single() + if(associated_files OR associated_regex) + # Load listed/matching associated files. + _ExternalData_arg_associated() + endif() endif() if(NOT have_original) @@ -430,27 +445,40 @@ endmacro() macro(_ExternalData_arg_series) # Configure series parsing and matching. + set(series_parse_prefix "") + set(series_parse_number "\\1") + set(series_parse_suffix "\\2") if(ExternalData_SERIES_PARSE) - if(NOT "${ExternalData_SERIES_PARSE}" MATCHES - "^\\^\\([^()]*\\)\\([^()]*\\)\\([^()]*\\)\\$$") + if(ExternalData_SERIES_PARSE_NUMBER AND ExternalData_SERIES_PARSE_SUFFIX) + if(ExternalData_SERIES_PARSE_PREFIX) + set(series_parse_prefix "\\${ExternalData_SERIES_PARSE_PREFIX}") + endif() + set(series_parse_number "\\${ExternalData_SERIES_PARSE_NUMBER}") + set(series_parse_suffix "\\${ExternalData_SERIES_PARSE_SUFFIX}") + elseif(NOT "x${ExternalData_SERIES_PARSE}" MATCHES "^x\\([^()]*\\)\\([^()]*\\)\\$$") message(FATAL_ERROR "ExternalData_SERIES_PARSE is set to\n" " ${ExternalData_SERIES_PARSE}\n" "which is not of the form\n" - " ^(...)(...)(...)$\n") + " ()()$\n" + "Fix the regular expression or set variables\n" + " ExternalData_SERIES_PARSE_PREFIX = regex group number, if any\n" + " ExternalData_SERIES_PARSE_NUMBER = regex group number\n" + " ExternalData_SERIES_PARSE_SUFFIX = regex group number\n" + ) endif() set(series_parse "${ExternalData_SERIES_PARSE}") else() - set(series_parse "^(.*)()(\\.[^./]*)$") + set(series_parse "([0-9]*)(\\.[^./]*)$") endif() if(ExternalData_SERIES_MATCH) set(series_match "${ExternalData_SERIES_MATCH}") else() - set(series_match "[_.]?[0-9]*") + set(series_match "[_.-]?[0-9]*") endif() # Parse the base, number, and extension components of the series. - string(REGEX REPLACE "${series_parse}" "\\1;\\2;\\3" tuple "${reldata}") + string(REGEX REPLACE "${series_parse}" "${series_parse_prefix};${series_parse_number};${series_parse_suffix}" tuple "${reldata}") list(LENGTH tuple len) if(NOT "${len}" EQUAL 3) message(FATAL_ERROR "Data file referenced by argument\n"