ENH: Provide unix-sytle command line parsing

Add System_Parse_CommandForUnix to the KWSys System interface as a
utility to parse a unix-style command line.  Move the existing
implementation out of ProcessUNIX.  Add a flags argument reserved for
future use in providing additional behavior.
This commit is contained in:
Brad King 2009-07-13 16:22:14 -04:00
parent de6f88d06f
commit 18e639d48a
3 changed files with 303 additions and 256 deletions

View File

@ -13,11 +13,13 @@
=========================================================================*/
#include "kwsysPrivate.h"
#include KWSYS_HEADER(Process.h)
#include KWSYS_HEADER(System.h)
/* Work-around CMake dependency scanning limitation. This must
duplicate the above list of headers. */
#if 0
# include "Process.h.in"
# include "System.h.in"
#endif
/*
@ -187,7 +189,6 @@ static void kwsysProcessesSignalHandler(int signum, siginfo_t* info,
#else
static void kwsysProcessesSignalHandler(int signum);
#endif
static char** kwsysProcessParseVerbatimCommand(const char* command);
/*--------------------------------------------------------------------------*/
/* Structure containing data used to implement the child's execution. */
@ -422,7 +423,7 @@ int kwsysProcess_AddCommand(kwsysProcess* cp, char const* const* command)
/* In order to run the given command line verbatim we need to
parse it. */
newCommands[cp->NumberOfCommands] =
kwsysProcessParseVerbatimCommand(*command);
kwsysSystem_Parse_CommandForUnix(*command, 0);
if(!newCommands[cp->NumberOfCommands])
{
/* Out of memory. */
@ -2729,257 +2730,3 @@ static void kwsysProcessesSignalHandler(int signum
}
#endif
}
/*--------------------------------------------------------------------------*/
static int kwsysProcessAppendByte(char* local,
char** begin, char** end,
int* size, char c)
{
/* Allocate space for the character. */
if((*end - *begin) >= *size)
{
kwsysProcess_ptrdiff_t length = *end - *begin;
char* newBuffer = (char*)malloc((size_t)(*size*2));
if(!newBuffer)
{
return 0;
}
memcpy(newBuffer, *begin, (size_t)(length)*sizeof(char));
if(*begin != local)
{
free(*begin);
}
*begin = newBuffer;
*end = *begin + length;
*size *= 2;
}
/* Store the character. */
*(*end)++ = c;
return 1;
}
/*--------------------------------------------------------------------------*/
static int kwsysProcessAppendArgument(char** local,
char*** begin, char*** end,
int* size,
char* arg_local,
char** arg_begin, char** arg_end,
int* arg_size)
{
/* Append a null-terminator to the argument string. */
if(!kwsysProcessAppendByte(arg_local, arg_begin, arg_end, arg_size, '\0'))
{
return 0;
}
/* Allocate space for the argument pointer. */
if((*end - *begin) >= *size)
{
kwsysProcess_ptrdiff_t length = *end - *begin;
char** newPointers = (char**)malloc((size_t)(*size)*2*sizeof(char*));
if(!newPointers)
{
return 0;
}
memcpy(newPointers, *begin, (size_t)(length)*sizeof(char*));
if(*begin != local)
{
free(*begin);
}
*begin = newPointers;
*end = *begin + length;
*size *= 2;
}
/* Allocate space for the argument string. */
**end = (char*)malloc((size_t)(*arg_end - *arg_begin));
if(!**end)
{
return 0;
}
/* Store the argument in the command array. */
memcpy(**end, *arg_begin,(size_t)(*arg_end - *arg_begin));
++(*end);
/* Reset the argument to be empty. */
*arg_end = *arg_begin;
return 1;
}
/*--------------------------------------------------------------------------*/
#define KWSYSPE_LOCAL_BYTE_COUNT 1024
#define KWSYSPE_LOCAL_ARGS_COUNT 32
static char** kwsysProcessParseVerbatimCommand(const char* command)
{
/* Create a buffer for argument pointers during parsing. */
char* local_pointers[KWSYSPE_LOCAL_ARGS_COUNT];
int pointers_size = KWSYSPE_LOCAL_ARGS_COUNT;
char** pointer_begin = local_pointers;
char** pointer_end = pointer_begin;
/* Create a buffer for argument strings during parsing. */
char local_buffer[KWSYSPE_LOCAL_BYTE_COUNT];
int buffer_size = KWSYSPE_LOCAL_BYTE_COUNT;
char* buffer_begin = local_buffer;
char* buffer_end = buffer_begin;
/* Parse the command string. Try to behave like a UNIX shell. */
char** newCommand = 0;
const char* c = command;
int in_argument = 0;
int in_escape = 0;
int in_single = 0;
int in_double = 0;
int failed = 0;
for(;*c; ++c)
{
if(in_escape)
{
/* This character is escaped so do no special handling. */
if(!in_argument)
{
in_argument = 1;
}
if(!kwsysProcessAppendByte(local_buffer, &buffer_begin,
&buffer_end, &buffer_size, *c))
{
failed = 1;
break;
}
in_escape = 0;
}
else if(*c == '\\' && !in_single)
{
/* The next character should be escaped. */
in_escape = 1;
}
else if(*c == '\'' && !in_double)
{
/* Enter or exit single-quote state. */
if(in_single)
{
in_single = 0;
}
else
{
in_single = 1;
if(!in_argument)
{
in_argument = 1;
}
}
}
else if(*c == '"' && !in_single)
{
/* Enter or exit double-quote state. */
if(in_double)
{
in_double = 0;
}
else
{
in_double = 1;
if(!in_argument)
{
in_argument = 1;
}
}
}
else if(isspace((unsigned char) *c))
{
if(in_argument)
{
if(in_single || in_double)
{
/* This space belongs to a quoted argument. */
if(!kwsysProcessAppendByte(local_buffer, &buffer_begin,
&buffer_end, &buffer_size, *c))
{
failed = 1;
break;
}
}
else
{
/* This argument has been terminated by whitespace. */
if(!kwsysProcessAppendArgument(local_pointers, &pointer_begin,
&pointer_end, &pointers_size,
local_buffer, &buffer_begin,
&buffer_end, &buffer_size))
{
failed = 1;
break;
}
in_argument = 0;
}
}
}
else
{
/* This character belong to an argument. */
if(!in_argument)
{
in_argument = 1;
}
if(!kwsysProcessAppendByte(local_buffer, &buffer_begin,
&buffer_end, &buffer_size, *c))
{
failed = 1;
break;
}
}
}
/* Finish the last argument. */
if(in_argument)
{
if(!kwsysProcessAppendArgument(local_pointers, &pointer_begin,
&pointer_end, &pointers_size,
local_buffer, &buffer_begin,
&buffer_end, &buffer_size))
{
failed = 1;
}
}
/* If we still have memory allocate space for the new command
buffer. */
if(!failed)
{
kwsysProcess_ptrdiff_t n = pointer_end - pointer_begin;
newCommand = (char**)malloc((size_t)(n+1)*sizeof(char*));
}
if(newCommand)
{
/* Copy the arguments into the new command buffer. */
kwsysProcess_ptrdiff_t n = pointer_end - pointer_begin;
memcpy(newCommand, pointer_begin, sizeof(char*)*(size_t)(n));
newCommand[n] = 0;
}
else
{
/* Free arguments already allocated. */
while(pointer_end != pointer_begin)
{
free(*(--pointer_end));
}
}
/* Free temporary buffers. */
if(pointer_begin != local_pointers)
{
free(pointer_begin);
}
if(buffer_begin != local_buffer)
{
free(buffer_begin);
}
/* Return the final command buffer. */
return newCommand;
}

View File

@ -20,11 +20,19 @@
# include "System.h.in"
#endif
#include <stddef.h> /* ptrdiff_t */
#include <stdlib.h> /* malloc, free */
#include <string.h> /* strlen */
#include <ctype.h> /* isalpha */
#include <stdio.h>
#if defined(KWSYS_C_HAS_PTRDIFF_T) && KWSYS_C_HAS_PTRDIFF_T
typedef ptrdiff_t kwsysSystem_ptrdiff_t;
#else
typedef int kwsysSystem_ptrdiff_t;
#endif
/*
Notes:
@ -579,3 +587,272 @@ int kwsysSystem_Shell_GetArgumentSizeForUnix(const char* in, int flags)
{
return kwsysSystem_Shell__GetArgumentSize(in, 1, flags);
}
/*--------------------------------------------------------------------------*/
static int kwsysSystem__AppendByte(char* local,
char** begin, char** end,
int* size, char c)
{
/* Allocate space for the character. */
if((*end - *begin) >= *size)
{
kwsysSystem_ptrdiff_t length = *end - *begin;
char* newBuffer = (char*)malloc((size_t)(*size*2));
if(!newBuffer)
{
return 0;
}
memcpy(newBuffer, *begin, (size_t)(length)*sizeof(char));
if(*begin != local)
{
free(*begin);
}
*begin = newBuffer;
*end = *begin + length;
*size *= 2;
}
/* Store the character. */
*(*end)++ = c;
return 1;
}
/*--------------------------------------------------------------------------*/
static int kwsysSystem__AppendArgument(char** local,
char*** begin, char*** end,
int* size,
char* arg_local,
char** arg_begin, char** arg_end,
int* arg_size)
{
/* Append a null-terminator to the argument string. */
if(!kwsysSystem__AppendByte(arg_local, arg_begin, arg_end, arg_size, '\0'))
{
return 0;
}
/* Allocate space for the argument pointer. */
if((*end - *begin) >= *size)
{
kwsysSystem_ptrdiff_t length = *end - *begin;
char** newPointers = (char**)malloc((size_t)(*size)*2*sizeof(char*));
if(!newPointers)
{
return 0;
}
memcpy(newPointers, *begin, (size_t)(length)*sizeof(char*));
if(*begin != local)
{
free(*begin);
}
*begin = newPointers;
*end = *begin + length;
*size *= 2;
}
/* Allocate space for the argument string. */
**end = (char*)malloc((size_t)(*arg_end - *arg_begin));
if(!**end)
{
return 0;
}
/* Store the argument in the command array. */
memcpy(**end, *arg_begin,(size_t)(*arg_end - *arg_begin));
++(*end);
/* Reset the argument to be empty. */
*arg_end = *arg_begin;
return 1;
}
/*--------------------------------------------------------------------------*/
#define KWSYSPE_LOCAL_BYTE_COUNT 1024
#define KWSYSPE_LOCAL_ARGS_COUNT 32
static char** kwsysSystem__ParseUnixCommand(const char* command, int flags)
{
/* Create a buffer for argument pointers during parsing. */
char* local_pointers[KWSYSPE_LOCAL_ARGS_COUNT];
int pointers_size = KWSYSPE_LOCAL_ARGS_COUNT;
char** pointer_begin = local_pointers;
char** pointer_end = pointer_begin;
/* Create a buffer for argument strings during parsing. */
char local_buffer[KWSYSPE_LOCAL_BYTE_COUNT];
int buffer_size = KWSYSPE_LOCAL_BYTE_COUNT;
char* buffer_begin = local_buffer;
char* buffer_end = buffer_begin;
/* Parse the command string. Try to behave like a UNIX shell. */
char** newCommand = 0;
const char* c = command;
int in_argument = 0;
int in_escape = 0;
int in_single = 0;
int in_double = 0;
int failed = 0;
for(;*c; ++c)
{
if(in_escape)
{
/* This character is escaped so do no special handling. */
if(!in_argument)
{
in_argument = 1;
}
if(!kwsysSystem__AppendByte(local_buffer, &buffer_begin,
&buffer_end, &buffer_size, *c))
{
failed = 1;
break;
}
in_escape = 0;
}
else if(*c == '\\' && !in_single)
{
/* The next character should be escaped. */
in_escape = 1;
}
else if(*c == '\'' && !in_double)
{
/* Enter or exit single-quote state. */
if(in_single)
{
in_single = 0;
}
else
{
in_single = 1;
if(!in_argument)
{
in_argument = 1;
}
}
}
else if(*c == '"' && !in_single)
{
/* Enter or exit double-quote state. */
if(in_double)
{
in_double = 0;
}
else
{
in_double = 1;
if(!in_argument)
{
in_argument = 1;
}
}
}
else if(isspace((unsigned char) *c))
{
if(in_argument)
{
if(in_single || in_double)
{
/* This space belongs to a quoted argument. */
if(!kwsysSystem__AppendByte(local_buffer, &buffer_begin,
&buffer_end, &buffer_size, *c))
{
failed = 1;
break;
}
}
else
{
/* This argument has been terminated by whitespace. */
if(!kwsysSystem__AppendArgument(local_pointers, &pointer_begin,
&pointer_end, &pointers_size,
local_buffer, &buffer_begin,
&buffer_end, &buffer_size))
{
failed = 1;
break;
}
in_argument = 0;
}
}
}
else
{
/* This character belong to an argument. */
if(!in_argument)
{
in_argument = 1;
}
if(!kwsysSystem__AppendByte(local_buffer, &buffer_begin,
&buffer_end, &buffer_size, *c))
{
failed = 1;
break;
}
}
}
/* Finish the last argument. */
if(in_argument)
{
if(!kwsysSystem__AppendArgument(local_pointers, &pointer_begin,
&pointer_end, &pointers_size,
local_buffer, &buffer_begin,
&buffer_end, &buffer_size))
{
failed = 1;
}
}
/* If we still have memory allocate space for the new command
buffer. */
if(!failed)
{
kwsysSystem_ptrdiff_t n = pointer_end - pointer_begin;
newCommand = (char**)malloc((size_t)(n+1)*sizeof(char*));
}
if(newCommand)
{
/* Copy the arguments into the new command buffer. */
kwsysSystem_ptrdiff_t n = pointer_end - pointer_begin;
memcpy(newCommand, pointer_begin, sizeof(char*)*(size_t)(n));
newCommand[n] = 0;
}
else
{
/* Free arguments already allocated. */
while(pointer_end != pointer_begin)
{
free(*(--pointer_end));
}
}
/* Free temporary buffers. */
if(pointer_begin != local_pointers)
{
free(pointer_begin);
}
if(buffer_begin != local_buffer)
{
free(buffer_begin);
}
/* The flags argument is currently unused. */
(void)flags;
/* Return the final command buffer. */
return newCommand;
}
/*--------------------------------------------------------------------------*/
char** kwsysSystem_Parse_CommandForUnix(const char* command, int flags)
{
/* Validate the flags. */
if(flags != 0)
{
return 0;
}
/* Forward to our internal implementation. */
return kwsysSystem__ParseUnixCommand(command, flags);
}

View File

@ -25,6 +25,7 @@
# define kwsysEXPORT @KWSYS_NAMESPACE@_EXPORT
#endif
#if !@KWSYS_NAMESPACE@_NAME_IS_KWSYS
# define kwsysSystem_Parse_CommandForUnix kwsys_ns(System_Parse_CommandForUnix)
# define kwsysSystem_Shell_GetArgumentForWindows kwsys_ns(System_Shell_GetArgumentForWindows)
# define kwsysSystem_Shell_GetArgumentForUnix kwsys_ns(System_Shell_GetArgumentForUnix)
# define kwsysSystem_Shell_GetArgumentSizeForWindows kwsys_ns(System_Shell_GetArgumentSizeForWindows)
@ -113,6 +114,27 @@ enum kwsysSystem_Shell_Flag_e
kwsysSystem_Shell_Flag_AllowMakeVariables = (1<<5)
};
/**
* Parse a unix-style command line string into separate arguments.
*
* On success, returns a pointer to an array of pointers to individual
* argument strings. Each string is null-terminated and the last
* entry in the array is a NULL pointer (just like argv). It is the
* caller's responsibility to free() the strings and the array of
* pointers to them.
*
* On failure, returns NULL. Failure occurs only on invalid flags or
* when memory cannot be allocated; never due to content of the input
* string. Missing close-quotes are treated as if the necessary
* closing quote appears.
*
* By default single- and double-quoted arguments are supported, and
* any character may be escaped by a backslash. The flags argument is
* reserved for future use, and must be zero (or the call will fail).
*/
kwsysEXPORT char** kwsysSystem_Parse_CommandForUnix(const char* command,
int flags);
#if defined(__cplusplus)
} /* extern "C" */
#endif
@ -123,6 +145,7 @@ enum kwsysSystem_Shell_Flag_e
# undef kwsys_ns
# undef kwsysEXPORT
# if !defined(KWSYS_NAMESPACE) && !@KWSYS_NAMESPACE@_NAME_IS_KWSYS
# undef kwsysSystem_Parse_CommandForUnix
# undef kwsysSystem_Shell_GetArgumentForWindows
# undef kwsysSystem_Shell_GetArgumentForUnix
# undef kwsysSystem_Shell_GetArgumentSizeForWindows