Merge branch 'update-expat' into release
This commit is contained in:
commit
cd1f0a824e
|
@ -973,9 +973,12 @@ XML_FreeContentModel(XML_Parser parser, XML_Content *model);
|
||||||
|
|
||||||
/* Exposing the memory handling functions used in Expat */
|
/* Exposing the memory handling functions used in Expat */
|
||||||
XMLPARSEAPI(void *)
|
XMLPARSEAPI(void *)
|
||||||
|
XML_ATTR_MALLOC
|
||||||
|
XML_ATTR_ALLOC_SIZE(2)
|
||||||
XML_MemMalloc(XML_Parser parser, size_t size);
|
XML_MemMalloc(XML_Parser parser, size_t size);
|
||||||
|
|
||||||
XMLPARSEAPI(void *)
|
XMLPARSEAPI(void *)
|
||||||
|
XML_ATTR_ALLOC_SIZE(3)
|
||||||
XML_MemRealloc(XML_Parser parser, void *ptr, size_t size);
|
XML_MemRealloc(XML_Parser parser, void *ptr, size_t size);
|
||||||
|
|
||||||
XMLPARSEAPI(void)
|
XMLPARSEAPI(void)
|
||||||
|
|
|
@ -67,12 +67,26 @@
|
||||||
#endif
|
#endif
|
||||||
#endif /* not defined XML_STATIC */
|
#endif /* not defined XML_STATIC */
|
||||||
|
|
||||||
|
#if !defined(XMLIMPORT) && defined(__GNUC__) && (__GNUC__ >= 4)
|
||||||
|
#define XMLIMPORT __attribute__ ((visibility ("default")))
|
||||||
|
#endif
|
||||||
|
|
||||||
/* If we didn't define it above, define it away: */
|
/* If we didn't define it above, define it away: */
|
||||||
#ifndef XMLIMPORT
|
#ifndef XMLIMPORT
|
||||||
#define XMLIMPORT
|
#define XMLIMPORT
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96))
|
||||||
|
#define XML_ATTR_MALLOC __attribute__((__malloc__))
|
||||||
|
#else
|
||||||
|
#define XML_ATTR_MALLOC
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
|
||||||
|
#define XML_ATTR_ALLOC_SIZE(x) __attribute__((__alloc_size__(x)))
|
||||||
|
#else
|
||||||
|
#define XML_ATTR_ALLOC_SIZE(x)
|
||||||
|
#endif
|
||||||
|
|
||||||
#define XMLPARSEAPI(type) XMLIMPORT type XMLCALL
|
#define XMLPARSEAPI(type) XMLIMPORT type XMLCALL
|
||||||
|
|
||||||
|
|
|
@ -71,3 +71,25 @@
|
||||||
#define inline
|
#define inline
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef UNUSED_P
|
||||||
|
# ifdef __GNUC__
|
||||||
|
# define UNUSED_P(p) UNUSED_ ## p __attribute__((__unused__))
|
||||||
|
# else
|
||||||
|
# define UNUSED_P(p) UNUSED_ ## p
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef);
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
|
@ -1730,7 +1730,8 @@ XML_GetBuffer(XML_Parser parser, int len)
|
||||||
#ifdef XML_CONTEXT_BYTES
|
#ifdef XML_CONTEXT_BYTES
|
||||||
int keep;
|
int keep;
|
||||||
#endif /* defined XML_CONTEXT_BYTES */
|
#endif /* defined XML_CONTEXT_BYTES */
|
||||||
int neededSize = len + (int)(bufferEnd - bufferPtr);
|
/* Do not invoke signed arithmetic overflow: */
|
||||||
|
int neededSize = (int) ((unsigned)len + (unsigned)(bufferEnd - bufferPtr));
|
||||||
if (neededSize < 0) {
|
if (neededSize < 0) {
|
||||||
errorCode = XML_ERROR_NO_MEMORY;
|
errorCode = XML_ERROR_NO_MEMORY;
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -1761,7 +1762,8 @@ XML_GetBuffer(XML_Parser parser, int len)
|
||||||
if (bufferSize == 0)
|
if (bufferSize == 0)
|
||||||
bufferSize = INIT_BUFFER_SIZE;
|
bufferSize = INIT_BUFFER_SIZE;
|
||||||
do {
|
do {
|
||||||
bufferSize *= 2;
|
/* Do not invoke signed arithmetic overflow: */
|
||||||
|
bufferSize = (int) (2U * (unsigned) bufferSize);
|
||||||
} while (bufferSize < neededSize && bufferSize > 0);
|
} while (bufferSize < neededSize && bufferSize > 0);
|
||||||
if (bufferSize <= 0) {
|
if (bufferSize <= 0) {
|
||||||
errorCode = XML_ERROR_NO_MEMORY;
|
errorCode = XML_ERROR_NO_MEMORY;
|
||||||
|
@ -2462,11 +2464,11 @@ doContent(XML_Parser parser,
|
||||||
for (;;) {
|
for (;;) {
|
||||||
int bufSize;
|
int bufSize;
|
||||||
int convLen;
|
int convLen;
|
||||||
XmlConvert(enc,
|
const enum XML_Convert_Result convert_res = XmlConvert(enc,
|
||||||
&fromPtr, rawNameEnd,
|
&fromPtr, rawNameEnd,
|
||||||
(ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
|
(ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
|
||||||
convLen = (int)(toPtr - (XML_Char *)tag->buf);
|
convLen = (int)(toPtr - (XML_Char *)tag->buf);
|
||||||
if (fromPtr == rawNameEnd) {
|
if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
|
||||||
tag->name.strLen = convLen;
|
tag->name.strLen = convLen;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -2687,11 +2689,11 @@ doContent(XML_Parser parser,
|
||||||
if (MUST_CONVERT(enc, s)) {
|
if (MUST_CONVERT(enc, s)) {
|
||||||
for (;;) {
|
for (;;) {
|
||||||
ICHAR *dataPtr = (ICHAR *)dataBuf;
|
ICHAR *dataPtr = (ICHAR *)dataBuf;
|
||||||
XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
|
const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
|
||||||
*eventEndPP = s;
|
*eventEndPP = s;
|
||||||
charDataHandler(handlerArg, dataBuf,
|
charDataHandler(handlerArg, dataBuf,
|
||||||
(int)(dataPtr - (ICHAR *)dataBuf));
|
(int)(dataPtr - (ICHAR *)dataBuf));
|
||||||
if (s == next)
|
if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
|
||||||
break;
|
break;
|
||||||
*eventPP = s;
|
*eventPP = s;
|
||||||
}
|
}
|
||||||
|
@ -3297,11 +3299,11 @@ doCdataSection(XML_Parser parser,
|
||||||
if (MUST_CONVERT(enc, s)) {
|
if (MUST_CONVERT(enc, s)) {
|
||||||
for (;;) {
|
for (;;) {
|
||||||
ICHAR *dataPtr = (ICHAR *)dataBuf;
|
ICHAR *dataPtr = (ICHAR *)dataBuf;
|
||||||
XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
|
const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
|
||||||
*eventEndPP = next;
|
*eventEndPP = next;
|
||||||
charDataHandler(handlerArg, dataBuf,
|
charDataHandler(handlerArg, dataBuf,
|
||||||
(int)(dataPtr - (ICHAR *)dataBuf));
|
(int)(dataPtr - (ICHAR *)dataBuf));
|
||||||
if (s == next)
|
if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
|
||||||
break;
|
break;
|
||||||
*eventPP = s;
|
*eventPP = s;
|
||||||
}
|
}
|
||||||
|
@ -4960,9 +4962,9 @@ internalEntityProcessor(XML_Parser parser,
|
||||||
|
|
||||||
static enum XML_Error PTRCALL
|
static enum XML_Error PTRCALL
|
||||||
errorProcessor(XML_Parser parser,
|
errorProcessor(XML_Parser parser,
|
||||||
const char *s,
|
const char *UNUSED_P(s),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const char **nextPtr)
|
const char **UNUSED_P(nextPtr))
|
||||||
{
|
{
|
||||||
return errorCode;
|
return errorCode;
|
||||||
}
|
}
|
||||||
|
@ -5378,6 +5380,7 @@ reportDefault(XML_Parser parser, const ENCODING *enc,
|
||||||
const char *s, const char *end)
|
const char *s, const char *end)
|
||||||
{
|
{
|
||||||
if (MUST_CONVERT(enc, s)) {
|
if (MUST_CONVERT(enc, s)) {
|
||||||
|
enum XML_Convert_Result convert_res;
|
||||||
const char **eventPP;
|
const char **eventPP;
|
||||||
const char **eventEndPP;
|
const char **eventEndPP;
|
||||||
if (enc == encoding) {
|
if (enc == encoding) {
|
||||||
|
@ -5390,11 +5393,11 @@ reportDefault(XML_Parser parser, const ENCODING *enc,
|
||||||
}
|
}
|
||||||
do {
|
do {
|
||||||
ICHAR *dataPtr = (ICHAR *)dataBuf;
|
ICHAR *dataPtr = (ICHAR *)dataBuf;
|
||||||
XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
|
convert_res = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
|
||||||
*eventEndPP = s;
|
*eventEndPP = s;
|
||||||
defaultHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf));
|
defaultHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf));
|
||||||
*eventPP = s;
|
*eventPP = s;
|
||||||
} while (s != end);
|
} while ((convert_res != XML_CONVERT_COMPLETED) && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
defaultHandler(handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s));
|
defaultHandler(handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s));
|
||||||
|
@ -6199,8 +6202,8 @@ poolAppend(STRING_POOL *pool, const ENCODING *enc,
|
||||||
if (!pool->ptr && !poolGrow(pool))
|
if (!pool->ptr && !poolGrow(pool))
|
||||||
return NULL;
|
return NULL;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
|
const enum XML_Convert_Result convert_res = XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
|
||||||
if (ptr == end)
|
if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
|
||||||
break;
|
break;
|
||||||
if (!poolGrow(pool))
|
if (!poolGrow(pool))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -6284,8 +6287,13 @@ poolGrow(STRING_POOL *pool)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (pool->blocks && pool->start == pool->blocks->s) {
|
if (pool->blocks && pool->start == pool->blocks->s) {
|
||||||
int blockSize = (int)(pool->end - pool->start)*2;
|
BLOCK *temp;
|
||||||
BLOCK *temp = (BLOCK *)
|
int blockSize = (int)((unsigned)(pool->end - pool->start)*2U);
|
||||||
|
|
||||||
|
if (blockSize < 0)
|
||||||
|
return XML_FALSE;
|
||||||
|
|
||||||
|
temp = (BLOCK *)
|
||||||
pool->mem->realloc_fcn(pool->blocks,
|
pool->mem->realloc_fcn(pool->blocks,
|
||||||
(offsetof(BLOCK, s)
|
(offsetof(BLOCK, s)
|
||||||
+ blockSize * sizeof(XML_Char)));
|
+ blockSize * sizeof(XML_Char)));
|
||||||
|
@ -6300,6 +6308,10 @@ poolGrow(STRING_POOL *pool)
|
||||||
else {
|
else {
|
||||||
BLOCK *tem;
|
BLOCK *tem;
|
||||||
int blockSize = (int)(pool->end - pool->start);
|
int blockSize = (int)(pool->end - pool->start);
|
||||||
|
|
||||||
|
if (blockSize < 0)
|
||||||
|
return XML_FALSE;
|
||||||
|
|
||||||
if (blockSize < INIT_BLOCK_SIZE)
|
if (blockSize < INIT_BLOCK_SIZE)
|
||||||
blockSize = INIT_BLOCK_SIZE;
|
blockSize = INIT_BLOCK_SIZE;
|
||||||
else
|
else
|
||||||
|
|
|
@ -195,9 +195,9 @@ prolog1(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
prolog2(PROLOG_STATE *state,
|
prolog2(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -216,9 +216,9 @@ prolog2(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
doctype0(PROLOG_STATE *state,
|
doctype0(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -264,9 +264,9 @@ doctype1(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
doctype2(PROLOG_STATE *state,
|
doctype2(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -281,9 +281,9 @@ doctype2(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
doctype3(PROLOG_STATE *state,
|
doctype3(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -298,9 +298,9 @@ doctype3(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
doctype4(PROLOG_STATE *state,
|
doctype4(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -318,9 +318,9 @@ doctype4(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
doctype5(PROLOG_STATE *state,
|
doctype5(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -437,9 +437,9 @@ externalSubset1(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
entity0(PROLOG_STATE *state,
|
entity0(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -457,9 +457,9 @@ entity0(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
entity1(PROLOG_STATE *state,
|
entity1(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -502,9 +502,9 @@ entity2(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
entity3(PROLOG_STATE *state,
|
entity3(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -519,9 +519,9 @@ entity3(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
entity4(PROLOG_STATE *state,
|
entity4(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -559,9 +559,9 @@ entity5(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
entity6(PROLOG_STATE *state,
|
entity6(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -605,9 +605,9 @@ entity7(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
entity8(PROLOG_STATE *state,
|
entity8(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -622,9 +622,9 @@ entity8(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
entity9(PROLOG_STATE *state,
|
entity9(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -639,9 +639,9 @@ entity9(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
entity10(PROLOG_STATE *state,
|
entity10(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -656,9 +656,9 @@ entity10(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
notation0(PROLOG_STATE *state,
|
notation0(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -697,9 +697,9 @@ notation1(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
notation2(PROLOG_STATE *state,
|
notation2(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -714,9 +714,9 @@ notation2(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
notation3(PROLOG_STATE *state,
|
notation3(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -732,9 +732,9 @@ notation3(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
notation4(PROLOG_STATE *state,
|
notation4(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -753,9 +753,9 @@ notation4(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
attlist0(PROLOG_STATE *state,
|
attlist0(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -771,9 +771,9 @@ attlist0(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
attlist1(PROLOG_STATE *state,
|
attlist1(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -833,9 +833,9 @@ attlist2(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
attlist3(PROLOG_STATE *state,
|
attlist3(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -852,9 +852,9 @@ attlist3(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
attlist4(PROLOG_STATE *state,
|
attlist4(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -872,9 +872,9 @@ attlist4(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
attlist5(PROLOG_STATE *state,
|
attlist5(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -889,9 +889,9 @@ attlist5(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
attlist6(PROLOG_STATE *state,
|
attlist6(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -906,9 +906,9 @@ attlist6(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
attlist7(PROLOG_STATE *state,
|
attlist7(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -967,9 +967,9 @@ attlist8(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
attlist9(PROLOG_STATE *state,
|
attlist9(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -984,9 +984,9 @@ attlist9(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
element0(PROLOG_STATE *state,
|
element0(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -1072,9 +1072,9 @@ element2(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
element3(PROLOG_STATE *state,
|
element3(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -1097,9 +1097,9 @@ element3(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
element4(PROLOG_STATE *state,
|
element4(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -1115,9 +1115,9 @@ element4(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
element5(PROLOG_STATE *state,
|
element5(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -1136,9 +1136,9 @@ element5(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
element6(PROLOG_STATE *state,
|
element6(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -1166,9 +1166,9 @@ element6(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
element7(PROLOG_STATE *state,
|
element7(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -1240,9 +1240,9 @@ condSect0(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
condSect1(PROLOG_STATE *state,
|
condSect1(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -1258,9 +1258,9 @@ condSect1(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
condSect2(PROLOG_STATE *state,
|
condSect2(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -1277,9 +1277,9 @@ condSect2(PROLOG_STATE *state,
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
declClose(PROLOG_STATE *state,
|
declClose(PROLOG_STATE *state,
|
||||||
int tok,
|
int tok,
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_PROLOG_S:
|
case XML_TOK_PROLOG_S:
|
||||||
|
@ -1292,11 +1292,11 @@ declClose(PROLOG_STATE *state,
|
||||||
}
|
}
|
||||||
|
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
error(PROLOG_STATE *state,
|
error(PROLOG_STATE *UNUSED_P(state),
|
||||||
int tok,
|
int UNUSED_P(tok),
|
||||||
const char *ptr,
|
const char *UNUSED_P(ptr),
|
||||||
const char *end,
|
const char *UNUSED_P(end),
|
||||||
const ENCODING *enc)
|
const ENCODING *UNUSED_P(enc))
|
||||||
{
|
{
|
||||||
return XML_ROLE_NONE;
|
return XML_ROLE_NONE;
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,7 +46,7 @@
|
||||||
#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
|
#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
|
||||||
|
|
||||||
#define UCS2_GET_NAMING(pages, hi, lo) \
|
#define UCS2_GET_NAMING(pages, hi, lo) \
|
||||||
(namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))
|
(namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F)))
|
||||||
|
|
||||||
/* A 2 byte UTF-8 representation splits the characters 11 bits between
|
/* A 2 byte UTF-8 representation splits the characters 11 bits between
|
||||||
the bottom 5 and 6 bits of the bytes. We need 8 bits to index into
|
the bottom 5 and 6 bits of the bytes. We need 8 bits to index into
|
||||||
|
@ -56,7 +56,7 @@
|
||||||
(namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
|
(namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
|
||||||
+ ((((byte)[0]) & 3) << 1) \
|
+ ((((byte)[0]) & 3) << 1) \
|
||||||
+ ((((byte)[1]) >> 5) & 1)] \
|
+ ((((byte)[1]) >> 5) & 1)] \
|
||||||
& (1 << (((byte)[1]) & 0x1F)))
|
& (1u << (((byte)[1]) & 0x1F)))
|
||||||
|
|
||||||
/* A 3 byte UTF-8 representation splits the characters 16 bits between
|
/* A 3 byte UTF-8 representation splits the characters 16 bits between
|
||||||
the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index
|
the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index
|
||||||
|
@ -69,7 +69,7 @@
|
||||||
<< 3) \
|
<< 3) \
|
||||||
+ ((((byte)[1]) & 3) << 1) \
|
+ ((((byte)[1]) & 3) << 1) \
|
||||||
+ ((((byte)[2]) >> 5) & 1)] \
|
+ ((((byte)[2]) >> 5) & 1)] \
|
||||||
& (1 << (((byte)[2]) & 0x1F)))
|
& (1u << (((byte)[2]) & 0x1F)))
|
||||||
|
|
||||||
#define UTF8_GET_NAMING(pages, p, n) \
|
#define UTF8_GET_NAMING(pages, p, n) \
|
||||||
((n) == 2 \
|
((n) == 2 \
|
||||||
|
@ -122,19 +122,19 @@
|
||||||
((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
|
((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
|
||||||
|
|
||||||
static int PTRFASTCALL
|
static int PTRFASTCALL
|
||||||
isNever(const ENCODING *enc, const char *p)
|
isNever(const ENCODING *UNUSED_P(enc), const char *UNUSED_P(p))
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int PTRFASTCALL
|
static int PTRFASTCALL
|
||||||
utf8_isName2(const ENCODING *enc, const char *p)
|
utf8_isName2(const ENCODING *UNUSED_P(enc), const char *p)
|
||||||
{
|
{
|
||||||
return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
|
return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int PTRFASTCALL
|
static int PTRFASTCALL
|
||||||
utf8_isName3(const ENCODING *enc, const char *p)
|
utf8_isName3(const ENCODING *UNUSED_P(enc), const char *p)
|
||||||
{
|
{
|
||||||
return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
|
return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
|
||||||
}
|
}
|
||||||
|
@ -142,13 +142,13 @@ utf8_isName3(const ENCODING *enc, const char *p)
|
||||||
#define utf8_isName4 isNever
|
#define utf8_isName4 isNever
|
||||||
|
|
||||||
static int PTRFASTCALL
|
static int PTRFASTCALL
|
||||||
utf8_isNmstrt2(const ENCODING *enc, const char *p)
|
utf8_isNmstrt2(const ENCODING *UNUSED_P(enc), const char *p)
|
||||||
{
|
{
|
||||||
return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
|
return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int PTRFASTCALL
|
static int PTRFASTCALL
|
||||||
utf8_isNmstrt3(const ENCODING *enc, const char *p)
|
utf8_isNmstrt3(const ENCODING *UNUSED_P(enc), const char *p)
|
||||||
{
|
{
|
||||||
return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
|
return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
|
||||||
}
|
}
|
||||||
|
@ -156,19 +156,19 @@ utf8_isNmstrt3(const ENCODING *enc, const char *p)
|
||||||
#define utf8_isNmstrt4 isNever
|
#define utf8_isNmstrt4 isNever
|
||||||
|
|
||||||
static int PTRFASTCALL
|
static int PTRFASTCALL
|
||||||
utf8_isInvalid2(const ENCODING *enc, const char *p)
|
utf8_isInvalid2(const ENCODING *UNUSED_P(enc), const char *p)
|
||||||
{
|
{
|
||||||
return UTF8_INVALID2((const unsigned char *)p);
|
return UTF8_INVALID2((const unsigned char *)p);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int PTRFASTCALL
|
static int PTRFASTCALL
|
||||||
utf8_isInvalid3(const ENCODING *enc, const char *p)
|
utf8_isInvalid3(const ENCODING *UNUSED_P(enc), const char *p)
|
||||||
{
|
{
|
||||||
return UTF8_INVALID3((const unsigned char *)p);
|
return UTF8_INVALID3((const unsigned char *)p);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int PTRFASTCALL
|
static int PTRFASTCALL
|
||||||
utf8_isInvalid4(const ENCODING *enc, const char *p)
|
utf8_isInvalid4(const ENCODING *UNUSED_P(enc), const char *p)
|
||||||
{
|
{
|
||||||
return UTF8_INVALID4((const unsigned char *)p);
|
return UTF8_INVALID4((const unsigned char *)p);
|
||||||
}
|
}
|
||||||
|
@ -329,39 +329,89 @@ enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
|
||||||
UTF8_cval4 = 0xf0
|
UTF8_cval4 = 0xf0
|
||||||
};
|
};
|
||||||
|
|
||||||
static void PTRCALL
|
void
|
||||||
utf8_toUtf8(const ENCODING *enc,
|
align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef)
|
||||||
|
{
|
||||||
|
const char * fromLim = *fromLimRef;
|
||||||
|
size_t walked = 0;
|
||||||
|
for (; fromLim > from; fromLim--, walked++) {
|
||||||
|
const unsigned char prev = (unsigned char)fromLim[-1];
|
||||||
|
if ((prev & 0xf8u) == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */
|
||||||
|
if (walked + 1 >= 4) {
|
||||||
|
fromLim += 4 - 1;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
walked = 0;
|
||||||
|
}
|
||||||
|
} else if ((prev & 0xf0u) == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */
|
||||||
|
if (walked + 1 >= 3) {
|
||||||
|
fromLim += 3 - 1;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
walked = 0;
|
||||||
|
}
|
||||||
|
} else if ((prev & 0xe0u) == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */
|
||||||
|
if (walked + 1 >= 2) {
|
||||||
|
fromLim += 2 - 1;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
walked = 0;
|
||||||
|
}
|
||||||
|
} else if ((prev & 0x80u) == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*fromLimRef = fromLim;
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum XML_Convert_Result PTRCALL
|
||||||
|
utf8_toUtf8(const ENCODING *UNUSED_P(enc),
|
||||||
const char **fromP, const char *fromLim,
|
const char **fromP, const char *fromLim,
|
||||||
char **toP, const char *toLim)
|
char **toP, const char *toLim)
|
||||||
{
|
{
|
||||||
|
enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
|
||||||
char *to;
|
char *to;
|
||||||
const char *from;
|
const char *from;
|
||||||
if (fromLim - *fromP > toLim - *toP) {
|
if (fromLim - *fromP > toLim - *toP) {
|
||||||
/* Avoid copying partial characters. */
|
/* Avoid copying partial characters. */
|
||||||
for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--)
|
res = XML_CONVERT_OUTPUT_EXHAUSTED;
|
||||||
if (((unsigned char)fromLim[-1] & 0xc0) != 0x80)
|
fromLim = *fromP + (toLim - *toP);
|
||||||
break;
|
align_limit_to_full_utf8_characters(*fromP, &fromLim);
|
||||||
}
|
}
|
||||||
for (to = *toP, from = *fromP; from != fromLim; from++, to++)
|
for (to = *toP, from = *fromP; (from < fromLim) && (to < toLim); from++, to++)
|
||||||
*to = *from;
|
*to = *from;
|
||||||
*fromP = from;
|
*fromP = from;
|
||||||
*toP = to;
|
*toP = to;
|
||||||
|
|
||||||
|
if ((to == toLim) && (from < fromLim))
|
||||||
|
return XML_CONVERT_OUTPUT_EXHAUSTED;
|
||||||
|
else
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void PTRCALL
|
static enum XML_Convert_Result PTRCALL
|
||||||
utf8_toUtf16(const ENCODING *enc,
|
utf8_toUtf16(const ENCODING *enc,
|
||||||
const char **fromP, const char *fromLim,
|
const char **fromP, const char *fromLim,
|
||||||
unsigned short **toP, const unsigned short *toLim)
|
unsigned short **toP, const unsigned short *toLim)
|
||||||
{
|
{
|
||||||
|
enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
|
||||||
unsigned short *to = *toP;
|
unsigned short *to = *toP;
|
||||||
const char *from = *fromP;
|
const char *from = *fromP;
|
||||||
while (from != fromLim && to != toLim) {
|
while (from < fromLim && to < toLim) {
|
||||||
switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
|
switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
|
||||||
case BT_LEAD2:
|
case BT_LEAD2:
|
||||||
|
if (fromLim - from < 2) {
|
||||||
|
res = XML_CONVERT_INPUT_INCOMPLETE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
*to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
|
*to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
|
||||||
from += 2;
|
from += 2;
|
||||||
break;
|
break;
|
||||||
case BT_LEAD3:
|
case BT_LEAD3:
|
||||||
|
if (fromLim - from < 3) {
|
||||||
|
res = XML_CONVERT_INPUT_INCOMPLETE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
*to++ = (unsigned short)(((from[0] & 0xf) << 12)
|
*to++ = (unsigned short)(((from[0] & 0xf) << 12)
|
||||||
| ((from[1] & 0x3f) << 6) | (from[2] & 0x3f));
|
| ((from[1] & 0x3f) << 6) | (from[2] & 0x3f));
|
||||||
from += 3;
|
from += 3;
|
||||||
|
@ -369,8 +419,14 @@ utf8_toUtf16(const ENCODING *enc,
|
||||||
case BT_LEAD4:
|
case BT_LEAD4:
|
||||||
{
|
{
|
||||||
unsigned long n;
|
unsigned long n;
|
||||||
if (to + 1 == toLim)
|
if (toLim - to < 2) {
|
||||||
|
res = XML_CONVERT_OUTPUT_EXHAUSTED;
|
||||||
goto after;
|
goto after;
|
||||||
|
}
|
||||||
|
if (fromLim - from < 4) {
|
||||||
|
res = XML_CONVERT_INPUT_INCOMPLETE;
|
||||||
|
goto after;
|
||||||
|
}
|
||||||
n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
|
n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
|
||||||
| ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
|
| ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
|
||||||
n -= 0x10000;
|
n -= 0x10000;
|
||||||
|
@ -388,6 +444,7 @@ utf8_toUtf16(const ENCODING *enc,
|
||||||
after:
|
after:
|
||||||
*fromP = from;
|
*fromP = from;
|
||||||
*toP = to;
|
*toP = to;
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef XML_NS
|
#ifdef XML_NS
|
||||||
|
@ -436,38 +493,43 @@ static const struct normal_encoding internal_utf8_encoding = {
|
||||||
STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
|
STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
|
||||||
};
|
};
|
||||||
|
|
||||||
static void PTRCALL
|
static enum XML_Convert_Result PTRCALL
|
||||||
latin1_toUtf8(const ENCODING *enc,
|
latin1_toUtf8(const ENCODING *UNUSED_P(enc),
|
||||||
const char **fromP, const char *fromLim,
|
const char **fromP, const char *fromLim,
|
||||||
char **toP, const char *toLim)
|
char **toP, const char *toLim)
|
||||||
{
|
{
|
||||||
for (;;) {
|
for (;;) {
|
||||||
unsigned char c;
|
unsigned char c;
|
||||||
if (*fromP == fromLim)
|
if (*fromP == fromLim)
|
||||||
break;
|
return XML_CONVERT_COMPLETED;
|
||||||
c = (unsigned char)**fromP;
|
c = (unsigned char)**fromP;
|
||||||
if (c & 0x80) {
|
if (c & 0x80) {
|
||||||
if (toLim - *toP < 2)
|
if (toLim - *toP < 2)
|
||||||
break;
|
return XML_CONVERT_OUTPUT_EXHAUSTED;
|
||||||
*(*toP)++ = (char)((c >> 6) | UTF8_cval2);
|
*(*toP)++ = (char)((c >> 6) | UTF8_cval2);
|
||||||
*(*toP)++ = (char)((c & 0x3f) | 0x80);
|
*(*toP)++ = (char)((c & 0x3f) | 0x80);
|
||||||
(*fromP)++;
|
(*fromP)++;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (*toP == toLim)
|
if (*toP == toLim)
|
||||||
break;
|
return XML_CONVERT_OUTPUT_EXHAUSTED;
|
||||||
*(*toP)++ = *(*fromP)++;
|
*(*toP)++ = *(*fromP)++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void PTRCALL
|
static enum XML_Convert_Result PTRCALL
|
||||||
latin1_toUtf16(const ENCODING *enc,
|
latin1_toUtf16(const ENCODING *UNUSED_P(enc),
|
||||||
const char **fromP, const char *fromLim,
|
const char **fromP, const char *fromLim,
|
||||||
unsigned short **toP, const unsigned short *toLim)
|
unsigned short **toP, const unsigned short *toLim)
|
||||||
{
|
{
|
||||||
while (*fromP != fromLim && *toP != toLim)
|
while (*fromP < fromLim && *toP < toLim)
|
||||||
*(*toP)++ = (unsigned char)*(*fromP)++;
|
*(*toP)++ = (unsigned char)*(*fromP)++;
|
||||||
|
|
||||||
|
if ((*toP == toLim) && (*fromP < fromLim))
|
||||||
|
return XML_CONVERT_OUTPUT_EXHAUSTED;
|
||||||
|
else
|
||||||
|
return XML_CONVERT_COMPLETED;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef XML_NS
|
#ifdef XML_NS
|
||||||
|
@ -494,13 +556,18 @@ static const struct normal_encoding latin1_encoding = {
|
||||||
STANDARD_VTABLE(sb_) NULL_VTABLE
|
STANDARD_VTABLE(sb_) NULL_VTABLE
|
||||||
};
|
};
|
||||||
|
|
||||||
static void PTRCALL
|
static enum XML_Convert_Result PTRCALL
|
||||||
ascii_toUtf8(const ENCODING *enc,
|
ascii_toUtf8(const ENCODING *UNUSED_P(enc),
|
||||||
const char **fromP, const char *fromLim,
|
const char **fromP, const char *fromLim,
|
||||||
char **toP, const char *toLim)
|
char **toP, const char *toLim)
|
||||||
{
|
{
|
||||||
while (*fromP != fromLim && *toP != toLim)
|
while (*fromP < fromLim && *toP < toLim)
|
||||||
*(*toP)++ = *(*fromP)++;
|
*(*toP)++ = *(*fromP)++;
|
||||||
|
|
||||||
|
if ((*toP == toLim) && (*fromP < fromLim))
|
||||||
|
return XML_CONVERT_OUTPUT_EXHAUSTED;
|
||||||
|
else
|
||||||
|
return XML_CONVERT_COMPLETED;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef XML_NS
|
#ifdef XML_NS
|
||||||
|
@ -547,13 +614,14 @@ unicode_byte_type(char hi, char lo)
|
||||||
}
|
}
|
||||||
|
|
||||||
#define DEFINE_UTF16_TO_UTF8(E) \
|
#define DEFINE_UTF16_TO_UTF8(E) \
|
||||||
static void PTRCALL \
|
static enum XML_Convert_Result PTRCALL \
|
||||||
E ## toUtf8(const ENCODING *enc, \
|
E ## toUtf8(const ENCODING *UNUSED_P(enc), \
|
||||||
const char **fromP, const char *fromLim, \
|
const char **fromP, const char *fromLim, \
|
||||||
char **toP, const char *toLim) \
|
char **toP, const char *toLim) \
|
||||||
{ \
|
{ \
|
||||||
const char *from; \
|
const char *from = *fromP; \
|
||||||
for (from = *fromP; from != fromLim; from += 2) { \
|
fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \
|
||||||
|
for (; from < fromLim; from += 2) { \
|
||||||
int plane; \
|
int plane; \
|
||||||
unsigned char lo2; \
|
unsigned char lo2; \
|
||||||
unsigned char lo = GET_LO(from); \
|
unsigned char lo = GET_LO(from); \
|
||||||
|
@ -563,7 +631,7 @@ E ## toUtf8(const ENCODING *enc, \
|
||||||
if (lo < 0x80) { \
|
if (lo < 0x80) { \
|
||||||
if (*toP == toLim) { \
|
if (*toP == toLim) { \
|
||||||
*fromP = from; \
|
*fromP = from; \
|
||||||
return; \
|
return XML_CONVERT_OUTPUT_EXHAUSTED; \
|
||||||
} \
|
} \
|
||||||
*(*toP)++ = lo; \
|
*(*toP)++ = lo; \
|
||||||
break; \
|
break; \
|
||||||
|
@ -573,7 +641,7 @@ E ## toUtf8(const ENCODING *enc, \
|
||||||
case 0x4: case 0x5: case 0x6: case 0x7: \
|
case 0x4: case 0x5: case 0x6: case 0x7: \
|
||||||
if (toLim - *toP < 2) { \
|
if (toLim - *toP < 2) { \
|
||||||
*fromP = from; \
|
*fromP = from; \
|
||||||
return; \
|
return XML_CONVERT_OUTPUT_EXHAUSTED; \
|
||||||
} \
|
} \
|
||||||
*(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \
|
*(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \
|
||||||
*(*toP)++ = ((lo & 0x3f) | 0x80); \
|
*(*toP)++ = ((lo & 0x3f) | 0x80); \
|
||||||
|
@ -581,7 +649,7 @@ E ## toUtf8(const ENCODING *enc, \
|
||||||
default: \
|
default: \
|
||||||
if (toLim - *toP < 3) { \
|
if (toLim - *toP < 3) { \
|
||||||
*fromP = from; \
|
*fromP = from; \
|
||||||
return; \
|
return XML_CONVERT_OUTPUT_EXHAUSTED; \
|
||||||
} \
|
} \
|
||||||
/* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
|
/* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
|
||||||
*(*toP)++ = ((hi >> 4) | UTF8_cval3); \
|
*(*toP)++ = ((hi >> 4) | UTF8_cval3); \
|
||||||
|
@ -591,7 +659,11 @@ E ## toUtf8(const ENCODING *enc, \
|
||||||
case 0xD8: case 0xD9: case 0xDA: case 0xDB: \
|
case 0xD8: case 0xD9: case 0xDA: case 0xDB: \
|
||||||
if (toLim - *toP < 4) { \
|
if (toLim - *toP < 4) { \
|
||||||
*fromP = from; \
|
*fromP = from; \
|
||||||
return; \
|
return XML_CONVERT_OUTPUT_EXHAUSTED; \
|
||||||
|
} \
|
||||||
|
if (fromLim - from < 4) { \
|
||||||
|
*fromP = from; \
|
||||||
|
return XML_CONVERT_INPUT_INCOMPLETE; \
|
||||||
} \
|
} \
|
||||||
plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
|
plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
|
||||||
*(*toP)++ = ((plane >> 2) | UTF8_cval4); \
|
*(*toP)++ = ((plane >> 2) | UTF8_cval4); \
|
||||||
|
@ -607,20 +679,32 @@ E ## toUtf8(const ENCODING *enc, \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
*fromP = from; \
|
*fromP = from; \
|
||||||
|
if (from < fromLim) \
|
||||||
|
return XML_CONVERT_INPUT_INCOMPLETE; \
|
||||||
|
else \
|
||||||
|
return XML_CONVERT_COMPLETED; \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define DEFINE_UTF16_TO_UTF16(E) \
|
#define DEFINE_UTF16_TO_UTF16(E) \
|
||||||
static void PTRCALL \
|
static enum XML_Convert_Result PTRCALL \
|
||||||
E ## toUtf16(const ENCODING *enc, \
|
E ## toUtf16(const ENCODING *UNUSED_P(enc), \
|
||||||
const char **fromP, const char *fromLim, \
|
const char **fromP, const char *fromLim, \
|
||||||
unsigned short **toP, const unsigned short *toLim) \
|
unsigned short **toP, const unsigned short *toLim) \
|
||||||
{ \
|
{ \
|
||||||
|
enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \
|
||||||
|
fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \
|
||||||
/* Avoid copying first half only of surrogate */ \
|
/* Avoid copying first half only of surrogate */ \
|
||||||
if (fromLim - *fromP > ((toLim - *toP) << 1) \
|
if (fromLim - *fromP > ((toLim - *toP) << 1) \
|
||||||
&& (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \
|
&& (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \
|
||||||
fromLim -= 2; \
|
fromLim -= 2; \
|
||||||
for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \
|
res = XML_CONVERT_INPUT_INCOMPLETE; \
|
||||||
|
} \
|
||||||
|
for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \
|
||||||
*(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
|
*(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
|
||||||
|
if ((*toP == toLim) && (*fromP < fromLim)) \
|
||||||
|
return XML_CONVERT_OUTPUT_EXHAUSTED; \
|
||||||
|
else \
|
||||||
|
return res; \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define SET2(ptr, ch) \
|
#define SET2(ptr, ch) \
|
||||||
|
@ -949,7 +1033,7 @@ streqci(const char *s1, const char *s2)
|
||||||
}
|
}
|
||||||
|
|
||||||
static void PTRCALL
|
static void PTRCALL
|
||||||
initUpdatePosition(const ENCODING *enc, const char *ptr,
|
initUpdatePosition(const ENCODING *UNUSED_P(enc), const char *ptr,
|
||||||
const char *end, POSITION *pos)
|
const char *end, POSITION *pos)
|
||||||
{
|
{
|
||||||
normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
|
normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
|
||||||
|
@ -1299,7 +1383,7 @@ unknown_isInvalid(const ENCODING *enc, const char *p)
|
||||||
return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
|
return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void PTRCALL
|
static enum XML_Convert_Result PTRCALL
|
||||||
unknown_toUtf8(const ENCODING *enc,
|
unknown_toUtf8(const ENCODING *enc,
|
||||||
const char **fromP, const char *fromLim,
|
const char **fromP, const char *fromLim,
|
||||||
char **toP, const char *toLim)
|
char **toP, const char *toLim)
|
||||||
|
@ -1310,21 +1394,21 @@ unknown_toUtf8(const ENCODING *enc,
|
||||||
const char *utf8;
|
const char *utf8;
|
||||||
int n;
|
int n;
|
||||||
if (*fromP == fromLim)
|
if (*fromP == fromLim)
|
||||||
break;
|
return XML_CONVERT_COMPLETED;
|
||||||
utf8 = uenc->utf8[(unsigned char)**fromP];
|
utf8 = uenc->utf8[(unsigned char)**fromP];
|
||||||
n = *utf8++;
|
n = *utf8++;
|
||||||
if (n == 0) {
|
if (n == 0) {
|
||||||
int c = uenc->convert(uenc->userData, *fromP);
|
int c = uenc->convert(uenc->userData, *fromP);
|
||||||
n = XmlUtf8Encode(c, buf);
|
n = XmlUtf8Encode(c, buf);
|
||||||
if (n > toLim - *toP)
|
if (n > toLim - *toP)
|
||||||
break;
|
return XML_CONVERT_OUTPUT_EXHAUSTED;
|
||||||
utf8 = buf;
|
utf8 = buf;
|
||||||
*fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
|
*fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
|
||||||
- (BT_LEAD2 - 2));
|
- (BT_LEAD2 - 2));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (n > toLim - *toP)
|
if (n > toLim - *toP)
|
||||||
break;
|
return XML_CONVERT_OUTPUT_EXHAUSTED;
|
||||||
(*fromP)++;
|
(*fromP)++;
|
||||||
}
|
}
|
||||||
do {
|
do {
|
||||||
|
@ -1333,13 +1417,13 @@ unknown_toUtf8(const ENCODING *enc,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void PTRCALL
|
static enum XML_Convert_Result PTRCALL
|
||||||
unknown_toUtf16(const ENCODING *enc,
|
unknown_toUtf16(const ENCODING *enc,
|
||||||
const char **fromP, const char *fromLim,
|
const char **fromP, const char *fromLim,
|
||||||
unsigned short **toP, const unsigned short *toLim)
|
unsigned short **toP, const unsigned short *toLim)
|
||||||
{
|
{
|
||||||
const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
|
const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
|
||||||
while (*fromP != fromLim && *toP != toLim) {
|
while (*fromP < fromLim && *toP < toLim) {
|
||||||
unsigned short c = uenc->utf16[(unsigned char)**fromP];
|
unsigned short c = uenc->utf16[(unsigned char)**fromP];
|
||||||
if (c == 0) {
|
if (c == 0) {
|
||||||
c = (unsigned short)
|
c = (unsigned short)
|
||||||
|
@ -1351,6 +1435,11 @@ unknown_toUtf16(const ENCODING *enc,
|
||||||
(*fromP)++;
|
(*fromP)++;
|
||||||
*(*toP)++ = c;
|
*(*toP)++ = c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((*toP == toLim) && (*fromP < fromLim))
|
||||||
|
return XML_CONVERT_OUTPUT_EXHAUSTED;
|
||||||
|
else
|
||||||
|
return XML_CONVERT_COMPLETED;
|
||||||
}
|
}
|
||||||
|
|
||||||
ENCODING *
|
ENCODING *
|
||||||
|
@ -1514,7 +1603,7 @@ initScan(const ENCODING * const *encodingTable,
|
||||||
{
|
{
|
||||||
const ENCODING **encPtr;
|
const ENCODING **encPtr;
|
||||||
|
|
||||||
if (ptr == end)
|
if (ptr >= end)
|
||||||
return XML_TOK_NONE;
|
return XML_TOK_NONE;
|
||||||
encPtr = enc->encPtr;
|
encPtr = enc->encPtr;
|
||||||
if (ptr + 1 == end) {
|
if (ptr + 1 == end) {
|
||||||
|
|
|
@ -130,6 +130,12 @@ typedef int (PTRCALL *SCANNER)(const ENCODING *,
|
||||||
const char *,
|
const char *,
|
||||||
const char **);
|
const char **);
|
||||||
|
|
||||||
|
enum XML_Convert_Result {
|
||||||
|
XML_CONVERT_COMPLETED = 0,
|
||||||
|
XML_CONVERT_INPUT_INCOMPLETE = 1,
|
||||||
|
XML_CONVERT_OUTPUT_EXHAUSTED = 2 /* and therefore potentially input remaining as well */
|
||||||
|
};
|
||||||
|
|
||||||
struct encoding {
|
struct encoding {
|
||||||
SCANNER scanners[XML_N_STATES];
|
SCANNER scanners[XML_N_STATES];
|
||||||
SCANNER literalScanners[XML_N_LITERAL_TYPES];
|
SCANNER literalScanners[XML_N_LITERAL_TYPES];
|
||||||
|
@ -158,12 +164,12 @@ struct encoding {
|
||||||
const char *ptr,
|
const char *ptr,
|
||||||
const char *end,
|
const char *end,
|
||||||
const char **badPtr);
|
const char **badPtr);
|
||||||
void (PTRCALL *utf8Convert)(const ENCODING *enc,
|
enum XML_Convert_Result (PTRCALL *utf8Convert)(const ENCODING *enc,
|
||||||
const char **fromP,
|
const char **fromP,
|
||||||
const char *fromLim,
|
const char *fromLim,
|
||||||
char **toP,
|
char **toP,
|
||||||
const char *toLim);
|
const char *toLim);
|
||||||
void (PTRCALL *utf16Convert)(const ENCODING *enc,
|
enum XML_Convert_Result (PTRCALL *utf16Convert)(const ENCODING *enc,
|
||||||
const char **fromP,
|
const char **fromP,
|
||||||
const char *fromLim,
|
const char *fromLim,
|
||||||
unsigned short **toP,
|
unsigned short **toP,
|
||||||
|
|
|
@ -87,27 +87,45 @@
|
||||||
#define PREFIX(ident) ident
|
#define PREFIX(ident) ident
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#define HAS_CHARS(enc, ptr, end, count) \
|
||||||
|
(end - ptr >= count * MINBPC(enc))
|
||||||
|
|
||||||
|
#define HAS_CHAR(enc, ptr, end) \
|
||||||
|
HAS_CHARS(enc, ptr, end, 1)
|
||||||
|
|
||||||
|
#define REQUIRE_CHARS(enc, ptr, end, count) \
|
||||||
|
{ \
|
||||||
|
if (! HAS_CHARS(enc, ptr, end, count)) { \
|
||||||
|
return XML_TOK_PARTIAL; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define REQUIRE_CHAR(enc, ptr, end) \
|
||||||
|
REQUIRE_CHARS(enc, ptr, end, 1)
|
||||||
|
|
||||||
|
|
||||||
/* ptr points to character following "<!-" */
|
/* ptr points to character following "<!-" */
|
||||||
|
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
|
PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
|
||||||
const char *end, const char **nextTokPtr)
|
const char *end, const char **nextTokPtr)
|
||||||
{
|
{
|
||||||
if (ptr != end) {
|
if (HAS_CHAR(enc, ptr, end)) {
|
||||||
if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
|
if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
|
||||||
*nextTokPtr = ptr;
|
*nextTokPtr = ptr;
|
||||||
return XML_TOK_INVALID;
|
return XML_TOK_INVALID;
|
||||||
}
|
}
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
while (ptr != end) {
|
while (HAS_CHAR(enc, ptr, end)) {
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
INVALID_CASES(ptr, nextTokPtr)
|
INVALID_CASES(ptr, nextTokPtr)
|
||||||
case BT_MINUS:
|
case BT_MINUS:
|
||||||
if ((ptr += MINBPC(enc)) == end)
|
ptr += MINBPC(enc);
|
||||||
return XML_TOK_PARTIAL;
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
|
if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
|
||||||
if ((ptr += MINBPC(enc)) == end)
|
ptr += MINBPC(enc);
|
||||||
return XML_TOK_PARTIAL;
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
|
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
|
||||||
*nextTokPtr = ptr;
|
*nextTokPtr = ptr;
|
||||||
return XML_TOK_INVALID;
|
return XML_TOK_INVALID;
|
||||||
|
@ -131,8 +149,7 @@ static int PTRCALL
|
||||||
PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
|
PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
|
||||||
const char *end, const char **nextTokPtr)
|
const char *end, const char **nextTokPtr)
|
||||||
{
|
{
|
||||||
if (ptr == end)
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
return XML_TOK_PARTIAL;
|
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
case BT_MINUS:
|
case BT_MINUS:
|
||||||
return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
||||||
|
@ -147,11 +164,10 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
|
||||||
*nextTokPtr = ptr;
|
*nextTokPtr = ptr;
|
||||||
return XML_TOK_INVALID;
|
return XML_TOK_INVALID;
|
||||||
}
|
}
|
||||||
while (ptr != end) {
|
while (HAS_CHAR(enc, ptr, end)) {
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
case BT_PERCNT:
|
case BT_PERCNT:
|
||||||
if (ptr + MINBPC(enc) == end)
|
REQUIRE_CHARS(enc, ptr, end, 2);
|
||||||
return XML_TOK_PARTIAL;
|
|
||||||
/* don't allow <!ENTITY% foo "whatever"> */
|
/* don't allow <!ENTITY% foo "whatever"> */
|
||||||
switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
|
switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
|
||||||
case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
|
case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
|
||||||
|
@ -175,7 +191,7 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
|
||||||
}
|
}
|
||||||
|
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr,
|
PREFIX(checkPiTarget)(const ENCODING *UNUSED_P(enc), const char *ptr,
|
||||||
const char *end, int *tokPtr)
|
const char *end, int *tokPtr)
|
||||||
{
|
{
|
||||||
int upper = 0;
|
int upper = 0;
|
||||||
|
@ -225,15 +241,14 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
|
||||||
{
|
{
|
||||||
int tok;
|
int tok;
|
||||||
const char *target = ptr;
|
const char *target = ptr;
|
||||||
if (ptr == end)
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
return XML_TOK_PARTIAL;
|
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
||||||
default:
|
default:
|
||||||
*nextTokPtr = ptr;
|
*nextTokPtr = ptr;
|
||||||
return XML_TOK_INVALID;
|
return XML_TOK_INVALID;
|
||||||
}
|
}
|
||||||
while (ptr != end) {
|
while (HAS_CHAR(enc, ptr, end)) {
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
||||||
case BT_S: case BT_CR: case BT_LF:
|
case BT_S: case BT_CR: case BT_LF:
|
||||||
|
@ -242,13 +257,12 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
|
||||||
return XML_TOK_INVALID;
|
return XML_TOK_INVALID;
|
||||||
}
|
}
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
while (ptr != end) {
|
while (HAS_CHAR(enc, ptr, end)) {
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
INVALID_CASES(ptr, nextTokPtr)
|
INVALID_CASES(ptr, nextTokPtr)
|
||||||
case BT_QUEST:
|
case BT_QUEST:
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
if (ptr == end)
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
return XML_TOK_PARTIAL;
|
|
||||||
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
|
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
|
||||||
*nextTokPtr = ptr + MINBPC(enc);
|
*nextTokPtr = ptr + MINBPC(enc);
|
||||||
return tok;
|
return tok;
|
||||||
|
@ -266,8 +280,7 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
|
||||||
return XML_TOK_INVALID;
|
return XML_TOK_INVALID;
|
||||||
}
|
}
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
if (ptr == end)
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
return XML_TOK_PARTIAL;
|
|
||||||
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
|
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
|
||||||
*nextTokPtr = ptr + MINBPC(enc);
|
*nextTokPtr = ptr + MINBPC(enc);
|
||||||
return tok;
|
return tok;
|
||||||
|
@ -282,15 +295,14 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
|
||||||
}
|
}
|
||||||
|
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr,
|
PREFIX(scanCdataSection)(const ENCODING *UNUSED_P(enc), const char *ptr,
|
||||||
const char *end, const char **nextTokPtr)
|
const char *end, const char **nextTokPtr)
|
||||||
{
|
{
|
||||||
static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
|
static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
|
||||||
ASCII_T, ASCII_A, ASCII_LSQB };
|
ASCII_T, ASCII_A, ASCII_LSQB };
|
||||||
int i;
|
int i;
|
||||||
/* CDATA[ */
|
/* CDATA[ */
|
||||||
if (end - ptr < 6 * MINBPC(enc))
|
REQUIRE_CHARS(enc, ptr, end, 6);
|
||||||
return XML_TOK_PARTIAL;
|
|
||||||
for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
|
for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
|
||||||
if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
|
if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
|
||||||
*nextTokPtr = ptr;
|
*nextTokPtr = ptr;
|
||||||
|
@ -305,7 +317,7 @@ static int PTRCALL
|
||||||
PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
|
PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
|
||||||
const char *end, const char **nextTokPtr)
|
const char *end, const char **nextTokPtr)
|
||||||
{
|
{
|
||||||
if (ptr == end)
|
if (ptr >= end)
|
||||||
return XML_TOK_NONE;
|
return XML_TOK_NONE;
|
||||||
if (MINBPC(enc) > 1) {
|
if (MINBPC(enc) > 1) {
|
||||||
size_t n = end - ptr;
|
size_t n = end - ptr;
|
||||||
|
@ -319,13 +331,11 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
case BT_RSQB:
|
case BT_RSQB:
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
if (ptr == end)
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
return XML_TOK_PARTIAL;
|
|
||||||
if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
|
if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
|
||||||
break;
|
break;
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
if (ptr == end)
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
return XML_TOK_PARTIAL;
|
|
||||||
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
|
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
|
||||||
ptr -= MINBPC(enc);
|
ptr -= MINBPC(enc);
|
||||||
break;
|
break;
|
||||||
|
@ -334,8 +344,7 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
|
||||||
return XML_TOK_CDATA_SECT_CLOSE;
|
return XML_TOK_CDATA_SECT_CLOSE;
|
||||||
case BT_CR:
|
case BT_CR:
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
if (ptr == end)
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
return XML_TOK_PARTIAL;
|
|
||||||
if (BYTE_TYPE(enc, ptr) == BT_LF)
|
if (BYTE_TYPE(enc, ptr) == BT_LF)
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
*nextTokPtr = ptr;
|
*nextTokPtr = ptr;
|
||||||
|
@ -348,7 +357,7 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
while (ptr != end) {
|
while (HAS_CHAR(enc, ptr, end)) {
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
#define LEAD_CASE(n) \
|
#define LEAD_CASE(n) \
|
||||||
case BT_LEAD ## n: \
|
case BT_LEAD ## n: \
|
||||||
|
@ -383,19 +392,18 @@ static int PTRCALL
|
||||||
PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
|
PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
|
||||||
const char *end, const char **nextTokPtr)
|
const char *end, const char **nextTokPtr)
|
||||||
{
|
{
|
||||||
if (ptr == end)
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
return XML_TOK_PARTIAL;
|
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
||||||
default:
|
default:
|
||||||
*nextTokPtr = ptr;
|
*nextTokPtr = ptr;
|
||||||
return XML_TOK_INVALID;
|
return XML_TOK_INVALID;
|
||||||
}
|
}
|
||||||
while (ptr != end) {
|
while (HAS_CHAR(enc, ptr, end)) {
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
||||||
case BT_S: case BT_CR: case BT_LF:
|
case BT_S: case BT_CR: case BT_LF:
|
||||||
for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
|
for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
case BT_S: case BT_CR: case BT_LF:
|
case BT_S: case BT_CR: case BT_LF:
|
||||||
break;
|
break;
|
||||||
|
@ -432,7 +440,7 @@ static int PTRCALL
|
||||||
PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
|
PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
|
||||||
const char *end, const char **nextTokPtr)
|
const char *end, const char **nextTokPtr)
|
||||||
{
|
{
|
||||||
if (ptr != end) {
|
if (HAS_CHAR(enc, ptr, end)) {
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
case BT_DIGIT:
|
case BT_DIGIT:
|
||||||
case BT_HEX:
|
case BT_HEX:
|
||||||
|
@ -441,7 +449,7 @@ PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
|
||||||
*nextTokPtr = ptr;
|
*nextTokPtr = ptr;
|
||||||
return XML_TOK_INVALID;
|
return XML_TOK_INVALID;
|
||||||
}
|
}
|
||||||
for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
|
for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
case BT_DIGIT:
|
case BT_DIGIT:
|
||||||
case BT_HEX:
|
case BT_HEX:
|
||||||
|
@ -464,7 +472,7 @@ static int PTRCALL
|
||||||
PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
|
PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
|
||||||
const char *end, const char **nextTokPtr)
|
const char *end, const char **nextTokPtr)
|
||||||
{
|
{
|
||||||
if (ptr != end) {
|
if (HAS_CHAR(enc, ptr, end)) {
|
||||||
if (CHAR_MATCHES(enc, ptr, ASCII_x))
|
if (CHAR_MATCHES(enc, ptr, ASCII_x))
|
||||||
return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
|
@ -474,7 +482,7 @@ PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
|
||||||
*nextTokPtr = ptr;
|
*nextTokPtr = ptr;
|
||||||
return XML_TOK_INVALID;
|
return XML_TOK_INVALID;
|
||||||
}
|
}
|
||||||
for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
|
for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
case BT_DIGIT:
|
case BT_DIGIT:
|
||||||
break;
|
break;
|
||||||
|
@ -496,8 +504,7 @@ static int PTRCALL
|
||||||
PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
|
PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
const char **nextTokPtr)
|
const char **nextTokPtr)
|
||||||
{
|
{
|
||||||
if (ptr == end)
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
return XML_TOK_PARTIAL;
|
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
||||||
case BT_NUM:
|
case BT_NUM:
|
||||||
|
@ -506,7 +513,7 @@ PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
*nextTokPtr = ptr;
|
*nextTokPtr = ptr;
|
||||||
return XML_TOK_INVALID;
|
return XML_TOK_INVALID;
|
||||||
}
|
}
|
||||||
while (ptr != end) {
|
while (HAS_CHAR(enc, ptr, end)) {
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
||||||
case BT_SEMI:
|
case BT_SEMI:
|
||||||
|
@ -529,7 +536,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
#ifdef XML_NS
|
#ifdef XML_NS
|
||||||
int hadColon = 0;
|
int hadColon = 0;
|
||||||
#endif
|
#endif
|
||||||
while (ptr != end) {
|
while (HAS_CHAR(enc, ptr, end)) {
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
||||||
#ifdef XML_NS
|
#ifdef XML_NS
|
||||||
|
@ -540,8 +547,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
}
|
}
|
||||||
hadColon = 1;
|
hadColon = 1;
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
if (ptr == end)
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
return XML_TOK_PARTIAL;
|
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
||||||
default:
|
default:
|
||||||
|
@ -555,8 +561,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
int t;
|
int t;
|
||||||
|
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
if (ptr == end)
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
return XML_TOK_PARTIAL;
|
|
||||||
t = BYTE_TYPE(enc, ptr);
|
t = BYTE_TYPE(enc, ptr);
|
||||||
if (t == BT_EQUALS)
|
if (t == BT_EQUALS)
|
||||||
break;
|
break;
|
||||||
|
@ -579,8 +584,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
#endif
|
#endif
|
||||||
for (;;) {
|
for (;;) {
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
if (ptr == end)
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
return XML_TOK_PARTIAL;
|
|
||||||
open = BYTE_TYPE(enc, ptr);
|
open = BYTE_TYPE(enc, ptr);
|
||||||
if (open == BT_QUOT || open == BT_APOS)
|
if (open == BT_QUOT || open == BT_APOS)
|
||||||
break;
|
break;
|
||||||
|
@ -598,8 +602,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
/* in attribute value */
|
/* in attribute value */
|
||||||
for (;;) {
|
for (;;) {
|
||||||
int t;
|
int t;
|
||||||
if (ptr == end)
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
return XML_TOK_PARTIAL;
|
|
||||||
t = BYTE_TYPE(enc, ptr);
|
t = BYTE_TYPE(enc, ptr);
|
||||||
if (t == open)
|
if (t == open)
|
||||||
break;
|
break;
|
||||||
|
@ -624,8 +627,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
if (ptr == end)
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
return XML_TOK_PARTIAL;
|
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
case BT_S:
|
case BT_S:
|
||||||
case BT_CR:
|
case BT_CR:
|
||||||
|
@ -642,8 +644,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
/* ptr points to closing quote */
|
/* ptr points to closing quote */
|
||||||
for (;;) {
|
for (;;) {
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
if (ptr == end)
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
return XML_TOK_PARTIAL;
|
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
||||||
case BT_S: case BT_CR: case BT_LF:
|
case BT_S: case BT_CR: case BT_LF:
|
||||||
|
@ -655,8 +656,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
case BT_SOL:
|
case BT_SOL:
|
||||||
sol:
|
sol:
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
if (ptr == end)
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
return XML_TOK_PARTIAL;
|
|
||||||
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
|
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
|
||||||
*nextTokPtr = ptr;
|
*nextTokPtr = ptr;
|
||||||
return XML_TOK_INVALID;
|
return XML_TOK_INVALID;
|
||||||
|
@ -688,13 +688,12 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
#ifdef XML_NS
|
#ifdef XML_NS
|
||||||
int hadColon;
|
int hadColon;
|
||||||
#endif
|
#endif
|
||||||
if (ptr == end)
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
return XML_TOK_PARTIAL;
|
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
||||||
case BT_EXCL:
|
case BT_EXCL:
|
||||||
if ((ptr += MINBPC(enc)) == end)
|
ptr += MINBPC(enc);
|
||||||
return XML_TOK_PARTIAL;
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
case BT_MINUS:
|
case BT_MINUS:
|
||||||
return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
||||||
|
@ -716,7 +715,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
hadColon = 0;
|
hadColon = 0;
|
||||||
#endif
|
#endif
|
||||||
/* we have a start-tag */
|
/* we have a start-tag */
|
||||||
while (ptr != end) {
|
while (HAS_CHAR(enc, ptr, end)) {
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
||||||
#ifdef XML_NS
|
#ifdef XML_NS
|
||||||
|
@ -727,8 +726,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
}
|
}
|
||||||
hadColon = 1;
|
hadColon = 1;
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
if (ptr == end)
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
return XML_TOK_PARTIAL;
|
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
||||||
default:
|
default:
|
||||||
|
@ -740,7 +738,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
case BT_S: case BT_CR: case BT_LF:
|
case BT_S: case BT_CR: case BT_LF:
|
||||||
{
|
{
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
while (ptr != end) {
|
while (HAS_CHAR(enc, ptr, end)) {
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
||||||
case BT_GT:
|
case BT_GT:
|
||||||
|
@ -765,8 +763,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
case BT_SOL:
|
case BT_SOL:
|
||||||
sol:
|
sol:
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
if (ptr == end)
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
return XML_TOK_PARTIAL;
|
|
||||||
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
|
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
|
||||||
*nextTokPtr = ptr;
|
*nextTokPtr = ptr;
|
||||||
return XML_TOK_INVALID;
|
return XML_TOK_INVALID;
|
||||||
|
@ -785,7 +782,7 @@ static int PTRCALL
|
||||||
PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
|
PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
const char **nextTokPtr)
|
const char **nextTokPtr)
|
||||||
{
|
{
|
||||||
if (ptr == end)
|
if (ptr >= end)
|
||||||
return XML_TOK_NONE;
|
return XML_TOK_NONE;
|
||||||
if (MINBPC(enc) > 1) {
|
if (MINBPC(enc) > 1) {
|
||||||
size_t n = end - ptr;
|
size_t n = end - ptr;
|
||||||
|
@ -803,7 +800,7 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
||||||
case BT_CR:
|
case BT_CR:
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
if (ptr == end)
|
if (! HAS_CHAR(enc, ptr, end))
|
||||||
return XML_TOK_TRAILING_CR;
|
return XML_TOK_TRAILING_CR;
|
||||||
if (BYTE_TYPE(enc, ptr) == BT_LF)
|
if (BYTE_TYPE(enc, ptr) == BT_LF)
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
|
@ -814,12 +811,12 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
return XML_TOK_DATA_NEWLINE;
|
return XML_TOK_DATA_NEWLINE;
|
||||||
case BT_RSQB:
|
case BT_RSQB:
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
if (ptr == end)
|
if (! HAS_CHAR(enc, ptr, end))
|
||||||
return XML_TOK_TRAILING_RSQB;
|
return XML_TOK_TRAILING_RSQB;
|
||||||
if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
|
if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
|
||||||
break;
|
break;
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
if (ptr == end)
|
if (! HAS_CHAR(enc, ptr, end))
|
||||||
return XML_TOK_TRAILING_RSQB;
|
return XML_TOK_TRAILING_RSQB;
|
||||||
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
|
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
|
||||||
ptr -= MINBPC(enc);
|
ptr -= MINBPC(enc);
|
||||||
|
@ -832,7 +829,7 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
while (ptr != end) {
|
while (HAS_CHAR(enc, ptr, end)) {
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
#define LEAD_CASE(n) \
|
#define LEAD_CASE(n) \
|
||||||
case BT_LEAD ## n: \
|
case BT_LEAD ## n: \
|
||||||
|
@ -845,12 +842,12 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
|
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
|
||||||
#undef LEAD_CASE
|
#undef LEAD_CASE
|
||||||
case BT_RSQB:
|
case BT_RSQB:
|
||||||
if (ptr + MINBPC(enc) != end) {
|
if (HAS_CHARS(enc, ptr, end, 2)) {
|
||||||
if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
|
if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (ptr + 2*MINBPC(enc) != end) {
|
if (HAS_CHARS(enc, ptr, end, 3)) {
|
||||||
if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
|
if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
break;
|
break;
|
||||||
|
@ -884,8 +881,7 @@ static int PTRCALL
|
||||||
PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
|
PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
const char **nextTokPtr)
|
const char **nextTokPtr)
|
||||||
{
|
{
|
||||||
if (ptr == end)
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
return XML_TOK_PARTIAL;
|
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
||||||
case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
|
case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
|
||||||
|
@ -895,7 +891,7 @@ PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
*nextTokPtr = ptr;
|
*nextTokPtr = ptr;
|
||||||
return XML_TOK_INVALID;
|
return XML_TOK_INVALID;
|
||||||
}
|
}
|
||||||
while (ptr != end) {
|
while (HAS_CHAR(enc, ptr, end)) {
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
||||||
case BT_SEMI:
|
case BT_SEMI:
|
||||||
|
@ -913,15 +909,14 @@ static int PTRCALL
|
||||||
PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
|
PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
const char **nextTokPtr)
|
const char **nextTokPtr)
|
||||||
{
|
{
|
||||||
if (ptr == end)
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
return XML_TOK_PARTIAL;
|
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
||||||
default:
|
default:
|
||||||
*nextTokPtr = ptr;
|
*nextTokPtr = ptr;
|
||||||
return XML_TOK_INVALID;
|
return XML_TOK_INVALID;
|
||||||
}
|
}
|
||||||
while (ptr != end) {
|
while (HAS_CHAR(enc, ptr, end)) {
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
||||||
case BT_CR: case BT_LF: case BT_S:
|
case BT_CR: case BT_LF: case BT_S:
|
||||||
|
@ -941,7 +936,7 @@ PREFIX(scanLit)(int open, const ENCODING *enc,
|
||||||
const char *ptr, const char *end,
|
const char *ptr, const char *end,
|
||||||
const char **nextTokPtr)
|
const char **nextTokPtr)
|
||||||
{
|
{
|
||||||
while (ptr != end) {
|
while (HAS_CHAR(enc, ptr, end)) {
|
||||||
int t = BYTE_TYPE(enc, ptr);
|
int t = BYTE_TYPE(enc, ptr);
|
||||||
switch (t) {
|
switch (t) {
|
||||||
INVALID_CASES(ptr, nextTokPtr)
|
INVALID_CASES(ptr, nextTokPtr)
|
||||||
|
@ -950,7 +945,7 @@ PREFIX(scanLit)(int open, const ENCODING *enc,
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
if (t != open)
|
if (t != open)
|
||||||
break;
|
break;
|
||||||
if (ptr == end)
|
if (! HAS_CHAR(enc, ptr, end))
|
||||||
return -XML_TOK_LITERAL;
|
return -XML_TOK_LITERAL;
|
||||||
*nextTokPtr = ptr;
|
*nextTokPtr = ptr;
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
|
@ -973,7 +968,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
const char **nextTokPtr)
|
const char **nextTokPtr)
|
||||||
{
|
{
|
||||||
int tok;
|
int tok;
|
||||||
if (ptr == end)
|
if (ptr >= end)
|
||||||
return XML_TOK_NONE;
|
return XML_TOK_NONE;
|
||||||
if (MINBPC(enc) > 1) {
|
if (MINBPC(enc) > 1) {
|
||||||
size_t n = end - ptr;
|
size_t n = end - ptr;
|
||||||
|
@ -992,8 +987,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
case BT_LT:
|
case BT_LT:
|
||||||
{
|
{
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
if (ptr == end)
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
return XML_TOK_PARTIAL;
|
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
case BT_EXCL:
|
case BT_EXCL:
|
||||||
return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
||||||
|
@ -1021,7 +1015,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
case BT_S: case BT_LF:
|
case BT_S: case BT_LF:
|
||||||
for (;;) {
|
for (;;) {
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
if (ptr == end)
|
if (! HAS_CHAR(enc, ptr, end))
|
||||||
break;
|
break;
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
case BT_S: case BT_LF:
|
case BT_S: case BT_LF:
|
||||||
|
@ -1048,11 +1042,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
return XML_TOK_OPEN_BRACKET;
|
return XML_TOK_OPEN_BRACKET;
|
||||||
case BT_RSQB:
|
case BT_RSQB:
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
if (ptr == end)
|
if (! HAS_CHAR(enc, ptr, end))
|
||||||
return -XML_TOK_CLOSE_BRACKET;
|
return -XML_TOK_CLOSE_BRACKET;
|
||||||
if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
|
if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
|
||||||
if (ptr + MINBPC(enc) == end)
|
REQUIRE_CHARS(enc, ptr, end, 2);
|
||||||
return XML_TOK_PARTIAL;
|
|
||||||
if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
|
if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
|
||||||
*nextTokPtr = ptr + 2*MINBPC(enc);
|
*nextTokPtr = ptr + 2*MINBPC(enc);
|
||||||
return XML_TOK_COND_SECT_CLOSE;
|
return XML_TOK_COND_SECT_CLOSE;
|
||||||
|
@ -1065,7 +1058,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
return XML_TOK_OPEN_PAREN;
|
return XML_TOK_OPEN_PAREN;
|
||||||
case BT_RPAR:
|
case BT_RPAR:
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
if (ptr == end)
|
if (! HAS_CHAR(enc, ptr, end))
|
||||||
return -XML_TOK_CLOSE_PAREN;
|
return -XML_TOK_CLOSE_PAREN;
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
case BT_AST:
|
case BT_AST:
|
||||||
|
@ -1141,7 +1134,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
*nextTokPtr = ptr;
|
*nextTokPtr = ptr;
|
||||||
return XML_TOK_INVALID;
|
return XML_TOK_INVALID;
|
||||||
}
|
}
|
||||||
while (ptr != end) {
|
while (HAS_CHAR(enc, ptr, end)) {
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
||||||
case BT_GT: case BT_RPAR: case BT_COMMA:
|
case BT_GT: case BT_RPAR: case BT_COMMA:
|
||||||
|
@ -1154,8 +1147,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
switch (tok) {
|
switch (tok) {
|
||||||
case XML_TOK_NAME:
|
case XML_TOK_NAME:
|
||||||
if (ptr == end)
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
return XML_TOK_PARTIAL;
|
|
||||||
tok = XML_TOK_PREFIXED_NAME;
|
tok = XML_TOK_PREFIXED_NAME;
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
||||||
|
@ -1204,10 +1196,10 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
|
||||||
const char *end, const char **nextTokPtr)
|
const char *end, const char **nextTokPtr)
|
||||||
{
|
{
|
||||||
const char *start;
|
const char *start;
|
||||||
if (ptr == end)
|
if (ptr >= end)
|
||||||
return XML_TOK_NONE;
|
return XML_TOK_NONE;
|
||||||
start = ptr;
|
start = ptr;
|
||||||
while (ptr != end) {
|
while (HAS_CHAR(enc, ptr, end)) {
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
#define LEAD_CASE(n) \
|
#define LEAD_CASE(n) \
|
||||||
case BT_LEAD ## n: ptr += n; break;
|
case BT_LEAD ## n: ptr += n; break;
|
||||||
|
@ -1232,7 +1224,7 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
|
||||||
case BT_CR:
|
case BT_CR:
|
||||||
if (ptr == start) {
|
if (ptr == start) {
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
if (ptr == end)
|
if (! HAS_CHAR(enc, ptr, end))
|
||||||
return XML_TOK_TRAILING_CR;
|
return XML_TOK_TRAILING_CR;
|
||||||
if (BYTE_TYPE(enc, ptr) == BT_LF)
|
if (BYTE_TYPE(enc, ptr) == BT_LF)
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
|
@ -1262,10 +1254,10 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
|
||||||
const char *end, const char **nextTokPtr)
|
const char *end, const char **nextTokPtr)
|
||||||
{
|
{
|
||||||
const char *start;
|
const char *start;
|
||||||
if (ptr == end)
|
if (ptr >= end)
|
||||||
return XML_TOK_NONE;
|
return XML_TOK_NONE;
|
||||||
start = ptr;
|
start = ptr;
|
||||||
while (ptr != end) {
|
while (HAS_CHAR(enc, ptr, end)) {
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
#define LEAD_CASE(n) \
|
#define LEAD_CASE(n) \
|
||||||
case BT_LEAD ## n: ptr += n; break;
|
case BT_LEAD ## n: ptr += n; break;
|
||||||
|
@ -1294,7 +1286,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
|
||||||
case BT_CR:
|
case BT_CR:
|
||||||
if (ptr == start) {
|
if (ptr == start) {
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
if (ptr == end)
|
if (! HAS_CHAR(enc, ptr, end))
|
||||||
return XML_TOK_TRAILING_CR;
|
return XML_TOK_TRAILING_CR;
|
||||||
if (BYTE_TYPE(enc, ptr) == BT_LF)
|
if (BYTE_TYPE(enc, ptr) == BT_LF)
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
|
@ -1326,15 +1318,15 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
|
||||||
end = ptr + n;
|
end = ptr + n;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
while (ptr != end) {
|
while (HAS_CHAR(enc, ptr, end)) {
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
INVALID_CASES(ptr, nextTokPtr)
|
INVALID_CASES(ptr, nextTokPtr)
|
||||||
case BT_LT:
|
case BT_LT:
|
||||||
if ((ptr += MINBPC(enc)) == end)
|
ptr += MINBPC(enc);
|
||||||
return XML_TOK_PARTIAL;
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
|
if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
|
||||||
if ((ptr += MINBPC(enc)) == end)
|
ptr += MINBPC(enc);
|
||||||
return XML_TOK_PARTIAL;
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
|
if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
|
||||||
++level;
|
++level;
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
|
@ -1342,11 +1334,11 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case BT_RSQB:
|
case BT_RSQB:
|
||||||
if ((ptr += MINBPC(enc)) == end)
|
ptr += MINBPC(enc);
|
||||||
return XML_TOK_PARTIAL;
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
|
if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
|
||||||
if ((ptr += MINBPC(enc)) == end)
|
ptr += MINBPC(enc);
|
||||||
return XML_TOK_PARTIAL;
|
REQUIRE_CHAR(enc, ptr, end);
|
||||||
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
|
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
if (level == 0) {
|
if (level == 0) {
|
||||||
|
@ -1373,7 +1365,7 @@ PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
|
||||||
{
|
{
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
end -= MINBPC(enc);
|
end -= MINBPC(enc);
|
||||||
for (; ptr != end; ptr += MINBPC(enc)) {
|
for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
case BT_DIGIT:
|
case BT_DIGIT:
|
||||||
case BT_HEX:
|
case BT_HEX:
|
||||||
|
@ -1521,7 +1513,7 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
|
||||||
}
|
}
|
||||||
|
|
||||||
static int PTRFASTCALL
|
static int PTRFASTCALL
|
||||||
PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
|
PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr)
|
||||||
{
|
{
|
||||||
int result = 0;
|
int result = 0;
|
||||||
/* skip &# */
|
/* skip &# */
|
||||||
|
@ -1565,7 +1557,7 @@ PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
|
PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr,
|
||||||
const char *end)
|
const char *end)
|
||||||
{
|
{
|
||||||
switch ((end - ptr)/MINBPC(enc)) {
|
switch ((end - ptr)/MINBPC(enc)) {
|
||||||
|
@ -1683,11 +1675,11 @@ PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int PTRCALL
|
static int PTRCALL
|
||||||
PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
|
PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1,
|
||||||
const char *end1, const char *ptr2)
|
const char *end1, const char *ptr2)
|
||||||
{
|
{
|
||||||
for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
|
for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
|
||||||
if (ptr1 == end1)
|
if (end1 - ptr1 < MINBPC(enc))
|
||||||
return 0;
|
return 0;
|
||||||
if (!CHAR_MATCHES(enc, ptr1, *ptr2))
|
if (!CHAR_MATCHES(enc, ptr1, *ptr2))
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1744,7 +1736,7 @@ PREFIX(updatePosition)(const ENCODING *enc,
|
||||||
const char *end,
|
const char *end,
|
||||||
POSITION *pos)
|
POSITION *pos)
|
||||||
{
|
{
|
||||||
while (ptr < end) {
|
while (HAS_CHAR(enc, ptr, end)) {
|
||||||
switch (BYTE_TYPE(enc, ptr)) {
|
switch (BYTE_TYPE(enc, ptr)) {
|
||||||
#define LEAD_CASE(n) \
|
#define LEAD_CASE(n) \
|
||||||
case BT_LEAD ## n: \
|
case BT_LEAD ## n: \
|
||||||
|
@ -1760,7 +1752,7 @@ PREFIX(updatePosition)(const ENCODING *enc,
|
||||||
case BT_CR:
|
case BT_CR:
|
||||||
pos->lineNumber++;
|
pos->lineNumber++;
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
|
if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
|
||||||
ptr += MINBPC(enc);
|
ptr += MINBPC(enc);
|
||||||
pos->columnNumber = (XML_Size)-1;
|
pos->columnNumber = (XML_Size)-1;
|
||||||
break;
|
break;
|
||||||
|
|
Loading…
Reference in New Issue