CMake/Tests/CMakeLib/testUTF8.cxx

/* Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
   file Copyright.txt or https://cmake.org/licensing for details.  */
#include <cm_utf8.h>
#include <stdio.h>

typedef char test_utf8_char[5];

static void test_utf8_char_print(test_utf8_char const c)
{
  unsigned char const* d = reinterpret_cast<unsigned char const*>(c);
  printf("[0x%02X,0x%02X,0x%02X,0x%02X]", (int)d[0], (int)d[1], (int)d[2],
         (int)d[3]);
}

struct test_utf8_entry
{
  int n;
  test_utf8_char str;
  unsigned int chr;
};

static test_utf8_entry const good_entry[] = {
  { 1, "\x20\x00\x00\x00", 0x0020 },  /* Space.  */
  { 2, "\xC2\xA9\x00\x00", 0x00A9 },  /* Copyright.  */
  { 3, "\xE2\x80\x98\x00", 0x2018 },  /* Open-single-quote.  */
  { 3, "\xE2\x80\x99\x00", 0x2019 },  /* Close-single-quote.  */
  { 4, "\xF0\xA3\x8E\xB4", 0x233B4 }, /* Example from RFC 3629.  */
  { 0, { 0, 0, 0, 0, 0 }, 0 }
};

static test_utf8_char const bad_chars[] = {
  "\x80\x00\x00\x00", "\xC0\x00\x00\x00", "\xE0\x00\x00\x00",
  "\xE0\x80\x80\x00", "\xF0\x80\x80\x80", { 0, 0, 0, 0, 0 }
};

static void report_good(bool passed, test_utf8_char const c)
{
  printf("%s: decoding good ", passed ? "pass" : "FAIL");
  test_utf8_char_print(c);
  printf(" (%s) ", c);
}

static void report_bad(bool passed, test_utf8_char const c)
{
  printf("%s: decoding bad  ", passed ? "pass" : "FAIL");
  test_utf8_char_print(c);
  printf(" ");
}

static bool decode_good(test_utf8_entry const entry)
{
  unsigned int uc;
  if (const char* e =
        cm_utf8_decode_character(entry.str, entry.str + 4, &uc)) {
    int used = static_cast<int>(e - entry.str);
    if (uc != entry.chr) {
      report_good(false, entry.str);
      printf("expected 0x%04X, got 0x%04X\n", entry.chr, uc);
      return false;
    }
    if (used != entry.n) {
      report_good(false, entry.str);
      printf("had %d bytes, used %d\n", entry.n, used);
      return false;
    }
    report_good(true, entry.str);
    printf("got 0x%04X\n", uc);
    return true;
  }
  report_good(false, entry.str);
  printf("failed\n");
  return false;
}

static bool decode_bad(test_utf8_char const s)
{
  unsigned int uc = 0xFFFFu;
  const char* e = cm_utf8_decode_character(s, s + 4, &uc);
  if (e) {
    report_bad(false, s);
    printf("expected failure, got 0x%04X\n", uc);
    return false;
  }
  report_bad(true, s);
  printf("failed as expected\n");
  return true;
}

int testUTF8(int /*unused*/, char* /*unused*/ [])
{
  int result = 0;
  for (test_utf8_entry const* e = good_entry; e->n; ++e) {
    if (!decode_good(*e)) {
      result = 1;
    }
  }
  for (test_utf8_char const* c = bad_chars; (*c)[0]; ++c) {
    if (!decode_bad(*c)) {
      result = 1;
    }
  }
  return result;
}
Simplify CMake per-source license notices Per-source copyright/license notice headers that spell out copyright holder names and years are hard to maintain and often out-of-date or plain wrong. Precise contributor information is already maintained automatically by the version control tool. Ultimately it is the receiver of a file who is responsible for determining its licensing status, and per-source notices are merely a convenience. Therefore it is simpler and more accurate for each source to have a generic notice of the license name and references to more detailed information on copyright holders and full license terms. Our `Copyright.txt` file now contains a list of Contributors whose names appeared source-level copyright notices. It also references version control history for more precise information. Therefore we no longer need to spell out the list of Contributors in each source file notice. Replace CMake per-source copyright/license notice headers with a short description of the license and links to `Copyright.txt` and online information available from "https://cmake.org/licensing". The online URL also handles cases of modules being copied out of our source into other projects, so we can drop our notices about replacing links with full license text. Run the `Utilities/Scripts/filter-notices.bash` script to perform the majority of the replacements mechanically. Manually fix up shebang lines and trailing newlines in a few files. Manually update the notices in a few files that the script does not handle. 2016-09-27 22:01:08 +03:00			`/* Distributed under the OSI-approved BSD 3-Clause License. See accompanying`
			`file Copyright.txt or https://cmake.org/licensing for details. */`
Test UTF-8 decoding This creates a unit test for cm_utf8. See issue #10003. 2009-12-08 23:44:22 +03:00			`#include <cm_utf8.h>`
			`#include <stdio.h>`

			`typedef char test_utf8_char[5];`

			`static void test_utf8_char_print(test_utf8_char const c)`
			`{`
			`unsigned char const* d = reinterpret_cast<unsigned char const*>(c);`
Revise C++ coding style using clang-format Run the `Utilities/Scripts/clang-format.bash` script to update all our C++ code to a new style defined by `.clang-format`. Use `clang-format` version 3.8. * If you reached this commit for a line in `git blame`, re-run the blame operation starting at the parent of this commit to see older history for the content. * See the parent commit for instructions to rebase a change across this style transition commit. 2016-05-16 17:34:04 +03:00			`printf("[0x%02X,0x%02X,0x%02X,0x%02X]", (int)d[0], (int)d[1], (int)d[2],`
			`(int)d[3]);`
Test UTF-8 decoding This creates a unit test for cm_utf8. See issue #10003. 2009-12-08 23:44:22 +03:00			`}`

			`struct test_utf8_entry`
			`{`
			`int n;`
			`test_utf8_char str;`
			`unsigned int chr;`
			`};`

			`static test_utf8_entry const good_entry[] = {`
Revise C++ coding style using clang-format Run the `Utilities/Scripts/clang-format.bash` script to update all our C++ code to a new style defined by `.clang-format`. Use `clang-format` version 3.8. * If you reached this commit for a line in `git blame`, re-run the blame operation starting at the parent of this commit to see older history for the content. * See the parent commit for instructions to rebase a change across this style transition commit. 2016-05-16 17:34:04 +03:00			`{ 1, "\x20\x00\x00\x00", 0x0020 }, /* Space. */`
			`{ 2, "\xC2\xA9\x00\x00", 0x00A9 }, /* Copyright. */`
			`{ 3, "\xE2\x80\x98\x00", 0x2018 }, /* Open-single-quote. */`
			`{ 3, "\xE2\x80\x99\x00", 0x2019 }, /* Close-single-quote. */`
			`{ 4, "\xF0\xA3\x8E\xB4", 0x233B4 }, /* Example from RFC 3629. */`
			`{ 0, { 0, 0, 0, 0, 0 }, 0 }`
Test UTF-8 decoding This creates a unit test for cm_utf8. See issue #10003. 2009-12-08 23:44:22 +03:00			`};`

			`static test_utf8_char const bad_chars[] = {`
Revise C++ coding style using clang-format Run the `Utilities/Scripts/clang-format.bash` script to update all our C++ code to a new style defined by `.clang-format`. Use `clang-format` version 3.8. * If you reached this commit for a line in `git blame`, re-run the blame operation starting at the parent of this commit to see older history for the content. * See the parent commit for instructions to rebase a change across this style transition commit. 2016-05-16 17:34:04 +03:00			`"\x80\x00\x00\x00", "\xC0\x00\x00\x00", "\xE0\x00\x00\x00",`
			`"\xE0\x80\x80\x00", "\xF0\x80\x80\x80", { 0, 0, 0, 0, 0 }`
Test UTF-8 decoding This creates a unit test for cm_utf8. See issue #10003. 2009-12-08 23:44:22 +03:00			`};`

			`static void report_good(bool passed, test_utf8_char const c)`
			`{`
Revise C++ coding style using clang-format Run the `Utilities/Scripts/clang-format.bash` script to update all our C++ code to a new style defined by `.clang-format`. Use `clang-format` version 3.8. * If you reached this commit for a line in `git blame`, re-run the blame operation starting at the parent of this commit to see older history for the content. * See the parent commit for instructions to rebase a change across this style transition commit. 2016-05-16 17:34:04 +03:00			`printf("%s: decoding good ", passed ? "pass" : "FAIL");`
Test UTF-8 decoding This creates a unit test for cm_utf8. See issue #10003. 2009-12-08 23:44:22 +03:00			`test_utf8_char_print(c);`
			`printf(" (%s) ", c);`
			`}`

			`static void report_bad(bool passed, test_utf8_char const c)`
			`{`
Revise C++ coding style using clang-format Run the `Utilities/Scripts/clang-format.bash` script to update all our C++ code to a new style defined by `.clang-format`. Use `clang-format` version 3.8. * If you reached this commit for a line in `git blame`, re-run the blame operation starting at the parent of this commit to see older history for the content. * See the parent commit for instructions to rebase a change across this style transition commit. 2016-05-16 17:34:04 +03:00			`printf("%s: decoding bad ", passed ? "pass" : "FAIL");`
Test UTF-8 decoding This creates a unit test for cm_utf8. See issue #10003. 2009-12-08 23:44:22 +03:00			`test_utf8_char_print(c);`
			`printf(" ");`
			`}`

			`static bool decode_good(test_utf8_entry const entry)`
			`{`
			`unsigned int uc;`
Revise C++ coding style using clang-format Run the `Utilities/Scripts/clang-format.bash` script to update all our C++ code to a new style defined by `.clang-format`. Use `clang-format` version 3.8. * If you reached this commit for a line in `git blame`, re-run the blame operation starting at the parent of this commit to see older history for the content. * See the parent commit for instructions to rebase a change across this style transition commit. 2016-05-16 17:34:04 +03:00			`if (const char* e =`
			`cm_utf8_decode_character(entry.str, entry.str + 4, &uc)) {`
			`int used = static_cast<int>(e - entry.str);`
			`if (uc != entry.chr) {`
Test UTF-8 decoding This creates a unit test for cm_utf8. See issue #10003. 2009-12-08 23:44:22 +03:00			`report_good(false, entry.str);`
			`printf("expected 0x%04X, got 0x%04X\n", entry.chr, uc);`
			`return false;`
Revise C++ coding style using clang-format Run the `Utilities/Scripts/clang-format.bash` script to update all our C++ code to a new style defined by `.clang-format`. Use `clang-format` version 3.8. * If you reached this commit for a line in `git blame`, re-run the blame operation starting at the parent of this commit to see older history for the content. * See the parent commit for instructions to rebase a change across this style transition commit. 2016-05-16 17:34:04 +03:00			`}`
			`if (used != entry.n) {`
Test UTF-8 decoding This creates a unit test for cm_utf8. See issue #10003. 2009-12-08 23:44:22 +03:00			`report_good(false, entry.str);`
			`printf("had %d bytes, used %d\n", entry.n, used);`
			`return false;`
Revise C++ coding style using clang-format Run the `Utilities/Scripts/clang-format.bash` script to update all our C++ code to a new style defined by `.clang-format`. Use `clang-format` version 3.8. * If you reached this commit for a line in `git blame`, re-run the blame operation starting at the parent of this commit to see older history for the content. * See the parent commit for instructions to rebase a change across this style transition commit. 2016-05-16 17:34:04 +03:00			`}`
Test UTF-8 decoding This creates a unit test for cm_utf8. See issue #10003. 2009-12-08 23:44:22 +03:00			`report_good(true, entry.str);`
			`printf("got 0x%04X\n", uc);`
			`return true;`
Revise C++ coding style using clang-format Run the `Utilities/Scripts/clang-format.bash` script to update all our C++ code to a new style defined by `.clang-format`. Use `clang-format` version 3.8. * If you reached this commit for a line in `git blame`, re-run the blame operation starting at the parent of this commit to see older history for the content. * See the parent commit for instructions to rebase a change across this style transition commit. 2016-05-16 17:34:04 +03:00			`}`
Test UTF-8 decoding This creates a unit test for cm_utf8. See issue #10003. 2009-12-08 23:44:22 +03:00			`report_good(false, entry.str);`
			`printf("failed\n");`
			`return false;`
			`}`

			`static bool decode_bad(test_utf8_char const s)`
			`{`
			`unsigned int uc = 0xFFFFu;`
Revise C++ coding style using clang-format Run the `Utilities/Scripts/clang-format.bash` script to update all our C++ code to a new style defined by `.clang-format`. Use `clang-format` version 3.8. * If you reached this commit for a line in `git blame`, re-run the blame operation starting at the parent of this commit to see older history for the content. * See the parent commit for instructions to rebase a change across this style transition commit. 2016-05-16 17:34:04 +03:00			`const char* e = cm_utf8_decode_character(s, s + 4, &uc);`
			`if (e) {`
Test UTF-8 decoding This creates a unit test for cm_utf8. See issue #10003. 2009-12-08 23:44:22 +03:00			`report_bad(false, s);`
			`printf("expected failure, got 0x%04X\n", uc);`
			`return false;`
Revise C++ coding style using clang-format Run the `Utilities/Scripts/clang-format.bash` script to update all our C++ code to a new style defined by `.clang-format`. Use `clang-format` version 3.8. * If you reached this commit for a line in `git blame`, re-run the blame operation starting at the parent of this commit to see older history for the content. * See the parent commit for instructions to rebase a change across this style transition commit. 2016-05-16 17:34:04 +03:00			`}`
Test UTF-8 decoding This creates a unit test for cm_utf8. See issue #10003. 2009-12-08 23:44:22 +03:00			`report_bad(true, s);`
			`printf("failed as expected\n");`
			`return true;`
			`}`

Make sure unnused parameters are /named/ 2016-08-17 02:49:57 +03:00			`int testUTF8(int /unused/, char* /unused/ [])`
Test UTF-8 decoding This creates a unit test for cm_utf8. See issue #10003. 2009-12-08 23:44:22 +03:00			`{`
			`int result = 0;`
Revise C++ coding style using clang-format Run the `Utilities/Scripts/clang-format.bash` script to update all our C++ code to a new style defined by `.clang-format`. Use `clang-format` version 3.8. * If you reached this commit for a line in `git blame`, re-run the blame operation starting at the parent of this commit to see older history for the content. * See the parent commit for instructions to rebase a change across this style transition commit. 2016-05-16 17:34:04 +03:00			`for (test_utf8_entry const* e = good_entry; e->n; ++e) {`
			`if (!decode_good(*e)) {`
Test UTF-8 decoding This creates a unit test for cm_utf8. See issue #10003. 2009-12-08 23:44:22 +03:00			`result = 1;`
			`}`
Revise C++ coding style using clang-format Run the `Utilities/Scripts/clang-format.bash` script to update all our C++ code to a new style defined by `.clang-format`. Use `clang-format` version 3.8. * If you reached this commit for a line in `git blame`, re-run the blame operation starting at the parent of this commit to see older history for the content. * See the parent commit for instructions to rebase a change across this style transition commit. 2016-05-16 17:34:04 +03:00			`}`
			`for (test_utf8_char const* c = bad_chars; (*c)[0]; ++c) {`
			`if (!decode_bad(*c)) {`
Test UTF-8 decoding This creates a unit test for cm_utf8. See issue #10003. 2009-12-08 23:44:22 +03:00			`result = 1;`
			`}`
Revise C++ coding style using clang-format Run the `Utilities/Scripts/clang-format.bash` script to update all our C++ code to a new style defined by `.clang-format`. Use `clang-format` version 3.8. * If you reached this commit for a line in `git blame`, re-run the blame operation starting at the parent of this commit to see older history for the content. * See the parent commit for instructions to rebase a change across this style transition commit. 2016-05-16 17:34:04 +03:00			`}`
Test UTF-8 decoding This creates a unit test for cm_utf8. See issue #10003. 2009-12-08 23:44:22 +03:00			`return result;`
			`}`