From 18e639d48a658dff76f47e8fcb3815776ee35c33 Mon Sep 17 00:00:00 2001 From: Brad King Date: Mon, 13 Jul 2009 16:22:14 -0400 Subject: [PATCH] ENH: Provide unix-sytle command line parsing Add System_Parse_CommandForUnix to the KWSys System interface as a utility to parse a unix-style command line. Move the existing implementation out of ProcessUNIX. Add a flags argument reserved for future use in providing additional behavior. --- Source/kwsys/ProcessUNIX.c | 259 +--------------------------------- Source/kwsys/System.c | 277 +++++++++++++++++++++++++++++++++++++ Source/kwsys/System.h.in | 23 +++ 3 files changed, 303 insertions(+), 256 deletions(-) diff --git a/Source/kwsys/ProcessUNIX.c b/Source/kwsys/ProcessUNIX.c index 2d70fa399..69436095f 100644 --- a/Source/kwsys/ProcessUNIX.c +++ b/Source/kwsys/ProcessUNIX.c @@ -13,11 +13,13 @@ =========================================================================*/ #include "kwsysPrivate.h" #include KWSYS_HEADER(Process.h) +#include KWSYS_HEADER(System.h) /* Work-around CMake dependency scanning limitation. This must duplicate the above list of headers. */ #if 0 # include "Process.h.in" +# include "System.h.in" #endif /* @@ -187,7 +189,6 @@ static void kwsysProcessesSignalHandler(int signum, siginfo_t* info, #else static void kwsysProcessesSignalHandler(int signum); #endif -static char** kwsysProcessParseVerbatimCommand(const char* command); /*--------------------------------------------------------------------------*/ /* Structure containing data used to implement the child's execution. */ @@ -422,7 +423,7 @@ int kwsysProcess_AddCommand(kwsysProcess* cp, char const* const* command) /* In order to run the given command line verbatim we need to parse it. */ newCommands[cp->NumberOfCommands] = - kwsysProcessParseVerbatimCommand(*command); + kwsysSystem_Parse_CommandForUnix(*command, 0); if(!newCommands[cp->NumberOfCommands]) { /* Out of memory. */ @@ -2729,257 +2730,3 @@ static void kwsysProcessesSignalHandler(int signum } #endif } - -/*--------------------------------------------------------------------------*/ -static int kwsysProcessAppendByte(char* local, - char** begin, char** end, - int* size, char c) -{ - /* Allocate space for the character. */ - if((*end - *begin) >= *size) - { - kwsysProcess_ptrdiff_t length = *end - *begin; - char* newBuffer = (char*)malloc((size_t)(*size*2)); - if(!newBuffer) - { - return 0; - } - memcpy(newBuffer, *begin, (size_t)(length)*sizeof(char)); - if(*begin != local) - { - free(*begin); - } - *begin = newBuffer; - *end = *begin + length; - *size *= 2; - } - - /* Store the character. */ - *(*end)++ = c; - return 1; -} - -/*--------------------------------------------------------------------------*/ -static int kwsysProcessAppendArgument(char** local, - char*** begin, char*** end, - int* size, - char* arg_local, - char** arg_begin, char** arg_end, - int* arg_size) -{ - /* Append a null-terminator to the argument string. */ - if(!kwsysProcessAppendByte(arg_local, arg_begin, arg_end, arg_size, '\0')) - { - return 0; - } - - /* Allocate space for the argument pointer. */ - if((*end - *begin) >= *size) - { - kwsysProcess_ptrdiff_t length = *end - *begin; - char** newPointers = (char**)malloc((size_t)(*size)*2*sizeof(char*)); - if(!newPointers) - { - return 0; - } - memcpy(newPointers, *begin, (size_t)(length)*sizeof(char*)); - if(*begin != local) - { - free(*begin); - } - *begin = newPointers; - *end = *begin + length; - *size *= 2; - } - - /* Allocate space for the argument string. */ - **end = (char*)malloc((size_t)(*arg_end - *arg_begin)); - if(!**end) - { - return 0; - } - - /* Store the argument in the command array. */ - memcpy(**end, *arg_begin,(size_t)(*arg_end - *arg_begin)); - ++(*end); - - /* Reset the argument to be empty. */ - *arg_end = *arg_begin; - - return 1; -} - -/*--------------------------------------------------------------------------*/ -#define KWSYSPE_LOCAL_BYTE_COUNT 1024 -#define KWSYSPE_LOCAL_ARGS_COUNT 32 -static char** kwsysProcessParseVerbatimCommand(const char* command) -{ - /* Create a buffer for argument pointers during parsing. */ - char* local_pointers[KWSYSPE_LOCAL_ARGS_COUNT]; - int pointers_size = KWSYSPE_LOCAL_ARGS_COUNT; - char** pointer_begin = local_pointers; - char** pointer_end = pointer_begin; - - /* Create a buffer for argument strings during parsing. */ - char local_buffer[KWSYSPE_LOCAL_BYTE_COUNT]; - int buffer_size = KWSYSPE_LOCAL_BYTE_COUNT; - char* buffer_begin = local_buffer; - char* buffer_end = buffer_begin; - - /* Parse the command string. Try to behave like a UNIX shell. */ - char** newCommand = 0; - const char* c = command; - int in_argument = 0; - int in_escape = 0; - int in_single = 0; - int in_double = 0; - int failed = 0; - for(;*c; ++c) - { - if(in_escape) - { - /* This character is escaped so do no special handling. */ - if(!in_argument) - { - in_argument = 1; - } - if(!kwsysProcessAppendByte(local_buffer, &buffer_begin, - &buffer_end, &buffer_size, *c)) - { - failed = 1; - break; - } - in_escape = 0; - } - else if(*c == '\\' && !in_single) - { - /* The next character should be escaped. */ - in_escape = 1; - } - else if(*c == '\'' && !in_double) - { - /* Enter or exit single-quote state. */ - if(in_single) - { - in_single = 0; - } - else - { - in_single = 1; - if(!in_argument) - { - in_argument = 1; - } - } - } - else if(*c == '"' && !in_single) - { - /* Enter or exit double-quote state. */ - if(in_double) - { - in_double = 0; - } - else - { - in_double = 1; - if(!in_argument) - { - in_argument = 1; - } - } - } - else if(isspace((unsigned char) *c)) - { - if(in_argument) - { - if(in_single || in_double) - { - /* This space belongs to a quoted argument. */ - if(!kwsysProcessAppendByte(local_buffer, &buffer_begin, - &buffer_end, &buffer_size, *c)) - { - failed = 1; - break; - } - } - else - { - /* This argument has been terminated by whitespace. */ - if(!kwsysProcessAppendArgument(local_pointers, &pointer_begin, - &pointer_end, &pointers_size, - local_buffer, &buffer_begin, - &buffer_end, &buffer_size)) - { - failed = 1; - break; - } - in_argument = 0; - } - } - } - else - { - /* This character belong to an argument. */ - if(!in_argument) - { - in_argument = 1; - } - if(!kwsysProcessAppendByte(local_buffer, &buffer_begin, - &buffer_end, &buffer_size, *c)) - { - failed = 1; - break; - } - } - } - - /* Finish the last argument. */ - if(in_argument) - { - if(!kwsysProcessAppendArgument(local_pointers, &pointer_begin, - &pointer_end, &pointers_size, - local_buffer, &buffer_begin, - &buffer_end, &buffer_size)) - { - failed = 1; - } - } - - /* If we still have memory allocate space for the new command - buffer. */ - if(!failed) - { - kwsysProcess_ptrdiff_t n = pointer_end - pointer_begin; - newCommand = (char**)malloc((size_t)(n+1)*sizeof(char*)); - } - - if(newCommand) - { - /* Copy the arguments into the new command buffer. */ - kwsysProcess_ptrdiff_t n = pointer_end - pointer_begin; - memcpy(newCommand, pointer_begin, sizeof(char*)*(size_t)(n)); - newCommand[n] = 0; - } - else - { - /* Free arguments already allocated. */ - while(pointer_end != pointer_begin) - { - free(*(--pointer_end)); - } - } - - /* Free temporary buffers. */ - if(pointer_begin != local_pointers) - { - free(pointer_begin); - } - if(buffer_begin != local_buffer) - { - free(buffer_begin); - } - - /* Return the final command buffer. */ - return newCommand; -} - diff --git a/Source/kwsys/System.c b/Source/kwsys/System.c index 900b02507..b67ccb637 100644 --- a/Source/kwsys/System.c +++ b/Source/kwsys/System.c @@ -20,11 +20,19 @@ # include "System.h.in" #endif +#include /* ptrdiff_t */ +#include /* malloc, free */ #include /* strlen */ #include /* isalpha */ #include +#if defined(KWSYS_C_HAS_PTRDIFF_T) && KWSYS_C_HAS_PTRDIFF_T +typedef ptrdiff_t kwsysSystem_ptrdiff_t; +#else +typedef int kwsysSystem_ptrdiff_t; +#endif + /* Notes: @@ -579,3 +587,272 @@ int kwsysSystem_Shell_GetArgumentSizeForUnix(const char* in, int flags) { return kwsysSystem_Shell__GetArgumentSize(in, 1, flags); } + +/*--------------------------------------------------------------------------*/ +static int kwsysSystem__AppendByte(char* local, + char** begin, char** end, + int* size, char c) +{ + /* Allocate space for the character. */ + if((*end - *begin) >= *size) + { + kwsysSystem_ptrdiff_t length = *end - *begin; + char* newBuffer = (char*)malloc((size_t)(*size*2)); + if(!newBuffer) + { + return 0; + } + memcpy(newBuffer, *begin, (size_t)(length)*sizeof(char)); + if(*begin != local) + { + free(*begin); + } + *begin = newBuffer; + *end = *begin + length; + *size *= 2; + } + + /* Store the character. */ + *(*end)++ = c; + return 1; +} + +/*--------------------------------------------------------------------------*/ +static int kwsysSystem__AppendArgument(char** local, + char*** begin, char*** end, + int* size, + char* arg_local, + char** arg_begin, char** arg_end, + int* arg_size) +{ + /* Append a null-terminator to the argument string. */ + if(!kwsysSystem__AppendByte(arg_local, arg_begin, arg_end, arg_size, '\0')) + { + return 0; + } + + /* Allocate space for the argument pointer. */ + if((*end - *begin) >= *size) + { + kwsysSystem_ptrdiff_t length = *end - *begin; + char** newPointers = (char**)malloc((size_t)(*size)*2*sizeof(char*)); + if(!newPointers) + { + return 0; + } + memcpy(newPointers, *begin, (size_t)(length)*sizeof(char*)); + if(*begin != local) + { + free(*begin); + } + *begin = newPointers; + *end = *begin + length; + *size *= 2; + } + + /* Allocate space for the argument string. */ + **end = (char*)malloc((size_t)(*arg_end - *arg_begin)); + if(!**end) + { + return 0; + } + + /* Store the argument in the command array. */ + memcpy(**end, *arg_begin,(size_t)(*arg_end - *arg_begin)); + ++(*end); + + /* Reset the argument to be empty. */ + *arg_end = *arg_begin; + + return 1; +} + +/*--------------------------------------------------------------------------*/ +#define KWSYSPE_LOCAL_BYTE_COUNT 1024 +#define KWSYSPE_LOCAL_ARGS_COUNT 32 +static char** kwsysSystem__ParseUnixCommand(const char* command, int flags) +{ + /* Create a buffer for argument pointers during parsing. */ + char* local_pointers[KWSYSPE_LOCAL_ARGS_COUNT]; + int pointers_size = KWSYSPE_LOCAL_ARGS_COUNT; + char** pointer_begin = local_pointers; + char** pointer_end = pointer_begin; + + /* Create a buffer for argument strings during parsing. */ + char local_buffer[KWSYSPE_LOCAL_BYTE_COUNT]; + int buffer_size = KWSYSPE_LOCAL_BYTE_COUNT; + char* buffer_begin = local_buffer; + char* buffer_end = buffer_begin; + + /* Parse the command string. Try to behave like a UNIX shell. */ + char** newCommand = 0; + const char* c = command; + int in_argument = 0; + int in_escape = 0; + int in_single = 0; + int in_double = 0; + int failed = 0; + for(;*c; ++c) + { + if(in_escape) + { + /* This character is escaped so do no special handling. */ + if(!in_argument) + { + in_argument = 1; + } + if(!kwsysSystem__AppendByte(local_buffer, &buffer_begin, + &buffer_end, &buffer_size, *c)) + { + failed = 1; + break; + } + in_escape = 0; + } + else if(*c == '\\' && !in_single) + { + /* The next character should be escaped. */ + in_escape = 1; + } + else if(*c == '\'' && !in_double) + { + /* Enter or exit single-quote state. */ + if(in_single) + { + in_single = 0; + } + else + { + in_single = 1; + if(!in_argument) + { + in_argument = 1; + } + } + } + else if(*c == '"' && !in_single) + { + /* Enter or exit double-quote state. */ + if(in_double) + { + in_double = 0; + } + else + { + in_double = 1; + if(!in_argument) + { + in_argument = 1; + } + } + } + else if(isspace((unsigned char) *c)) + { + if(in_argument) + { + if(in_single || in_double) + { + /* This space belongs to a quoted argument. */ + if(!kwsysSystem__AppendByte(local_buffer, &buffer_begin, + &buffer_end, &buffer_size, *c)) + { + failed = 1; + break; + } + } + else + { + /* This argument has been terminated by whitespace. */ + if(!kwsysSystem__AppendArgument(local_pointers, &pointer_begin, + &pointer_end, &pointers_size, + local_buffer, &buffer_begin, + &buffer_end, &buffer_size)) + { + failed = 1; + break; + } + in_argument = 0; + } + } + } + else + { + /* This character belong to an argument. */ + if(!in_argument) + { + in_argument = 1; + } + if(!kwsysSystem__AppendByte(local_buffer, &buffer_begin, + &buffer_end, &buffer_size, *c)) + { + failed = 1; + break; + } + } + } + + /* Finish the last argument. */ + if(in_argument) + { + if(!kwsysSystem__AppendArgument(local_pointers, &pointer_begin, + &pointer_end, &pointers_size, + local_buffer, &buffer_begin, + &buffer_end, &buffer_size)) + { + failed = 1; + } + } + + /* If we still have memory allocate space for the new command + buffer. */ + if(!failed) + { + kwsysSystem_ptrdiff_t n = pointer_end - pointer_begin; + newCommand = (char**)malloc((size_t)(n+1)*sizeof(char*)); + } + + if(newCommand) + { + /* Copy the arguments into the new command buffer. */ + kwsysSystem_ptrdiff_t n = pointer_end - pointer_begin; + memcpy(newCommand, pointer_begin, sizeof(char*)*(size_t)(n)); + newCommand[n] = 0; + } + else + { + /* Free arguments already allocated. */ + while(pointer_end != pointer_begin) + { + free(*(--pointer_end)); + } + } + + /* Free temporary buffers. */ + if(pointer_begin != local_pointers) + { + free(pointer_begin); + } + if(buffer_begin != local_buffer) + { + free(buffer_begin); + } + + /* The flags argument is currently unused. */ + (void)flags; + + /* Return the final command buffer. */ + return newCommand; +} + +/*--------------------------------------------------------------------------*/ +char** kwsysSystem_Parse_CommandForUnix(const char* command, int flags) +{ + /* Validate the flags. */ + if(flags != 0) + { + return 0; + } + + /* Forward to our internal implementation. */ + return kwsysSystem__ParseUnixCommand(command, flags); +} diff --git a/Source/kwsys/System.h.in b/Source/kwsys/System.h.in index f96c74e6f..842ebbdba 100644 --- a/Source/kwsys/System.h.in +++ b/Source/kwsys/System.h.in @@ -25,6 +25,7 @@ # define kwsysEXPORT @KWSYS_NAMESPACE@_EXPORT #endif #if !@KWSYS_NAMESPACE@_NAME_IS_KWSYS +# define kwsysSystem_Parse_CommandForUnix kwsys_ns(System_Parse_CommandForUnix) # define kwsysSystem_Shell_GetArgumentForWindows kwsys_ns(System_Shell_GetArgumentForWindows) # define kwsysSystem_Shell_GetArgumentForUnix kwsys_ns(System_Shell_GetArgumentForUnix) # define kwsysSystem_Shell_GetArgumentSizeForWindows kwsys_ns(System_Shell_GetArgumentSizeForWindows) @@ -113,6 +114,27 @@ enum kwsysSystem_Shell_Flag_e kwsysSystem_Shell_Flag_AllowMakeVariables = (1<<5) }; +/** + * Parse a unix-style command line string into separate arguments. + * + * On success, returns a pointer to an array of pointers to individual + * argument strings. Each string is null-terminated and the last + * entry in the array is a NULL pointer (just like argv). It is the + * caller's responsibility to free() the strings and the array of + * pointers to them. + * + * On failure, returns NULL. Failure occurs only on invalid flags or + * when memory cannot be allocated; never due to content of the input + * string. Missing close-quotes are treated as if the necessary + * closing quote appears. + * + * By default single- and double-quoted arguments are supported, and + * any character may be escaped by a backslash. The flags argument is + * reserved for future use, and must be zero (or the call will fail). + */ +kwsysEXPORT char** kwsysSystem_Parse_CommandForUnix(const char* command, + int flags); + #if defined(__cplusplus) } /* extern "C" */ #endif @@ -123,6 +145,7 @@ enum kwsysSystem_Shell_Flag_e # undef kwsys_ns # undef kwsysEXPORT # if !defined(KWSYS_NAMESPACE) && !@KWSYS_NAMESPACE@_NAME_IS_KWSYS +# undef kwsysSystem_Parse_CommandForUnix # undef kwsysSystem_Shell_GetArgumentForWindows # undef kwsysSystem_Shell_GetArgumentForUnix # undef kwsysSystem_Shell_GetArgumentSizeForWindows