cmake/Source/cm_codecvt.cxx

/* Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
   file Copyright.txt or https://cmake.org/licensing for details.  */
#include "cm_codecvt.hxx"

#if defined(_WIN32)
#  include <windows.h>

#  include <assert.h>
#  include <string.h>
#  undef max
#  include "cmsys/Encoding.hxx"
#endif

#if defined(_WIN32)
/* Number of leading ones before a zero in the byte (see cm_utf8.c).  */
extern "C" unsigned char const cm_utf8_ones[256];
#endif

codecvt::codecvt(Encoding e)
#if defined(_WIN32)
  : m_codepage(0)
#endif
{
  switch (e) {
    case codecvt::ANSI:
#if defined(_WIN32)
      m_noconv = false;
      m_codepage = CP_ACP;
      break;
#endif
    // We don't know which ANSI encoding to use for other platforms than
    // Windows so we don't do any conversion there
    case codecvt::UTF8:
    case codecvt::UTF8_WITH_BOM:
    // Assume internal encoding is UTF-8
    case codecvt::None:
    // No encoding
    default:
      this->m_noconv = true;
  }
}

codecvt::~codecvt() = default;

bool codecvt::do_always_noconv() const throw()
{
  return this->m_noconv;
}

std::codecvt_base::result codecvt::do_out(mbstate_t& state, const char* from,
                                          const char* from_end,
                                          const char*& from_next, char* to,
                                          char* to_end, char*& to_next) const
{
  from_next = from;
  to_next = to;
  if (this->m_noconv) {
    return std::codecvt_base::noconv;
  }
#if defined(_WIN32)
  // Use a const view of the state because we should not modify it until we
  // have fully processed and consume a byte (with sufficient space in the
  // output buffer).  We call helpers to re-cast and modify the state
  State const& lstate = reinterpret_cast<State&>(state);

  while (from_next != from_end) {
    // Count leading ones in the bits of the next byte.
    unsigned char const ones =
      cm_utf8_ones[static_cast<unsigned char>(*from_next)];

    if (ones != 1 && lstate.buffered != 0) {
      // We have a buffered partial codepoint that we never completed.
      return std::codecvt_base::error;
    } else if (ones == 1 && lstate.buffered == 0) {
      // This is a continuation of a codepoint that never started.
      return std::codecvt_base::error;
    }

    // Compute the number of bytes in the current codepoint.
    int need = 0;
    switch (ones) {
      case 0: // 0xxx xxxx: new codepoint of size 1
        need = 1;
        break;
      case 1: // 10xx xxxx: continues a codepoint
        assert(lstate.size != 0);
        need = lstate.size;
        break;
      case 2: // 110x xxxx: new codepoint of size 2
        need = 2;
        break;
      case 3: // 1110 xxxx: new codepoint of size 3
        need = 3;
        break;
      case 4: // 1111 0xxx: new codepoint of size 4
        need = 4;
        break;
      default: // invalid byte
        return std::codecvt_base::error;
    }
    assert(need > 0);

    if (lstate.buffered + 1 == need) {
      // This byte completes a codepoint.
      std::codecvt_base::result decode_result =
        this->Decode(state, need, from_next, to_next, to_end);
      if (decode_result != std::codecvt_base::ok) {
        return decode_result;
      }
    } else {
      // This byte does not complete a codepoint.
      this->BufferPartial(state, need, from_next);
    }
  }

  return std::codecvt_base::ok;
#else
  static_cast<void>(state);
  static_cast<void>(from);
  static_cast<void>(from_end);
  static_cast<void>(from_next);
  static_cast<void>(to);
  static_cast<void>(to_end);
  static_cast<void>(to_next);
  return std::codecvt_base::noconv;
#endif
}

std::codecvt_base::result codecvt::do_unshift(mbstate_t& state, char* to,
                                              char* to_end,
                                              char*& to_next) const
{
  to_next = to;
  if (this->m_noconv) {
    return std::codecvt_base::noconv;
  }
#if defined(_WIN32)
  State& lstate = reinterpret_cast<State&>(state);
  if (lstate.buffered != 0) {
    return this->DecodePartial(state, to_next, to_end);
  }
  return std::codecvt_base::ok;
#else
  static_cast<void>(state);
  static_cast<void>(to_end);
  return std::codecvt_base::ok;
#endif
}

#if defined(_WIN32)
std::codecvt_base::result codecvt::Decode(mbstate_t& state, int size,
                                          const char*& from_next,
                                          char*& to_next, char* to_end) const
{
  State& lstate = reinterpret_cast<State&>(state);

  // Collect all the bytes for this codepoint.
  char buf[4];
  memcpy(buf, lstate.partial, lstate.buffered);
  buf[lstate.buffered] = *from_next;

  // Convert the encoding.
  wchar_t wbuf[2];
  int wlen =
    MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, buf, size, wbuf, 2);
  if (wlen <= 0) {
    return std::codecvt_base::error;
  }

  int tlen = WideCharToMultiByte(m_codepage, 0, wbuf, wlen, to_next,
                                 to_end - to_next, NULL, NULL);
  if (tlen <= 0) {
    if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
      return std::codecvt_base::partial;
    }
    return std::codecvt_base::error;
  }

  // Move past the now-consumed byte in the input buffer.
  ++from_next;

  // Move past the converted codepoint in the output buffer.
  to_next += tlen;

  // Re-initialize the state for the next codepoint to start.
  lstate = State();

  return std::codecvt_base::ok;
}

std::codecvt_base::result codecvt::DecodePartial(mbstate_t& state,
                                                 char*& to_next,
                                                 char* to_end) const
{
  State& lstate = reinterpret_cast<State&>(state);

  // Try converting the partial codepoint.
  wchar_t wbuf[2];
  int wlen = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, lstate.partial,
                                 lstate.buffered, wbuf, 2);
  if (wlen <= 0) {
    return std::codecvt_base::error;
  }

  int tlen = WideCharToMultiByte(m_codepage, 0, wbuf, wlen, to_next,
                                 to_end - to_next, NULL, NULL);
  if (tlen <= 0) {
    if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
      return std::codecvt_base::partial;
    }
    return std::codecvt_base::error;
  }

  // Move past the converted codepoint in the output buffer.
  to_next += tlen;

  // Re-initialize the state for the next codepoint to start.
  lstate = State();

  return std::codecvt_base::ok;
}

void codecvt::BufferPartial(mbstate_t& state, int size,
                            const char*& from_next) const
{
  State& lstate = reinterpret_cast<State&>(state);

  // Save the byte in our buffer for later.
  lstate.partial[lstate.buffered++] = *from_next;
  lstate.size = size;

  // Move past the now-consumed byte in the input buffer.
  ++from_next;
}
#endif

int codecvt::do_max_length() const throw()
{
  return 4;
}

int codecvt::do_encoding() const throw()
{
  return 0;
}
New upstream version 3.8.0 8 years ago			`/* Distributed under the OSI-approved BSD 3-Clause License. See accompanying`
			`file Copyright.txt or https://cmake.org/licensing for details. */`
			`#include "cm_codecvt.hxx"`

			`#if defined(_WIN32)`
New upstream version 3.16.3 5 years ago			`# include <windows.h>`

New upstream version 3.12.1 6 years ago			`# include <assert.h>`
			`# include <string.h>`
			`# undef max`
			`# include "cmsys/Encoding.hxx"`
New upstream version 3.9.0 8 years ago			`#endif`

			`#if defined(_WIN32)`
			`/* Number of leading ones before a zero in the byte (see cm_utf8.c). */`
			`extern "C" unsigned char const cm_utf8_ones[256];`
New upstream version 3.8.0 8 years ago			`#endif`

			`codecvt::codecvt(Encoding e)`
			`#if defined(_WIN32)`
New upstream version 3.9.0 8 years ago			`: m_codepage(0)`
New upstream version 3.8.0 8 years ago			`#endif`
			`{`
			`switch (e) {`
			`case codecvt::ANSI:`
			`#if defined(_WIN32)`
			`m_noconv = false;`
			`m_codepage = CP_ACP;`
			`break;`
			`#endif`
			`// We don't know which ANSI encoding to use for other platforms than`
			`// Windows so we don't do any conversion there`
			`case codecvt::UTF8:`
New upstream version 3.21.2 3 years ago			`case codecvt::UTF8_WITH_BOM:`
New upstream version 3.8.0 8 years ago			`// Assume internal encoding is UTF-8`
			`case codecvt::None:`
			`// No encoding`
			`default:`
New upstream version 3.21.2 3 years ago			`this->m_noconv = true;`
New upstream version 3.8.0 8 years ago			`}`
			`}`

New upstream version 3.15.4 5 years ago			`codecvt::~codecvt() = default;`
New upstream version 3.8.0 8 years ago
			`bool codecvt::do_always_noconv() const throw()`
			`{`
New upstream version 3.21.2 3 years ago			`return this->m_noconv;`
New upstream version 3.11.1 7 years ago			`}`
New upstream version 3.8.0 8 years ago
			`std::codecvt_base::result codecvt::do_out(mbstate_t& state, const char* from,`
			`const char* from_end,`
			`const char& from_next, char to,`
			`char* to_end, char*& to_next) const`
			`{`
New upstream version 3.9.0 8 years ago			`from_next = from;`
			`to_next = to;`
New upstream version 3.21.2 3 years ago			`if (this->m_noconv) {`
New upstream version 3.9.0 8 years ago			`return std::codecvt_base::noconv;`
New upstream version 3.8.0 8 years ago			`}`
			`#if defined(_WIN32)`
New upstream version 3.9.0 8 years ago			`// Use a const view of the state because we should not modify it until we`
			`// have fully processed and consume a byte (with sufficient space in the`
			`// output buffer). We call helpers to re-cast and modify the state`
			`State const& lstate = reinterpret_cast<State&>(state);`

			`while (from_next != from_end) {`
			`// Count leading ones in the bits of the next byte.`
			`unsigned char const ones =`
			`cm_utf8_ones[static_cast<unsigned char>(*from_next)];`

			`if (ones != 1 && lstate.buffered != 0) {`
			`// We have a buffered partial codepoint that we never completed.`
			`return std::codecvt_base::error;`
			`} else if (ones == 1 && lstate.buffered == 0) {`
			`// This is a continuation of a codepoint that never started.`
			`return std::codecvt_base::error;`
			`}`

			`// Compute the number of bytes in the current codepoint.`
			`int need = 0;`
			`switch (ones) {`
			`case 0: // 0xxx xxxx: new codepoint of size 1`
			`need = 1;`
			`break;`
			`case 1: // 10xx xxxx: continues a codepoint`
			`assert(lstate.size != 0);`
			`need = lstate.size;`
			`break;`
			`case 2: // 110x xxxx: new codepoint of size 2`
			`need = 2;`
			`break;`
			`case 3: // 1110 xxxx: new codepoint of size 3`
			`need = 3;`
			`break;`
			`case 4: // 1111 0xxx: new codepoint of size 4`
			`need = 4;`
			`break;`
			`default: // invalid byte`
			`return std::codecvt_base::error;`
New upstream version 3.8.0 8 years ago			`}`
New upstream version 3.9.0 8 years ago			`assert(need > 0);`

			`if (lstate.buffered + 1 == need) {`
			`// This byte completes a codepoint.`
			`std::codecvt_base::result decode_result =`
			`this->Decode(state, need, from_next, to_next, to_end);`
			`if (decode_result != std::codecvt_base::ok) {`
			`return decode_result;`
New upstream version 3.8.0 8 years ago			`}`
			`} else {`
New upstream version 3.9.0 8 years ago			`// This byte does not complete a codepoint.`
			`this->BufferPartial(state, need, from_next);`
New upstream version 3.8.0 8 years ago			`}`
			`}`
New upstream version 3.9.0 8 years ago
			`return std::codecvt_base::ok;`
New upstream version 3.8.0 8 years ago			`#else`
			`static_cast<void>(state);`
			`static_cast<void>(from);`
			`static_cast<void>(from_end);`
			`static_cast<void>(from_next);`
			`static_cast<void>(to);`
			`static_cast<void>(to_end);`
			`static_cast<void>(to_next);`
New upstream version 3.9.0 8 years ago			`return std::codecvt_base::noconv;`
New upstream version 3.8.0 8 years ago			`#endif`
New upstream version 3.11.1 7 years ago			`}`
New upstream version 3.8.0 8 years ago
			`std::codecvt_base::result codecvt::do_unshift(mbstate_t& state, char* to,`
			`char* to_end,`
			`char*& to_next) const`
			`{`
			`to_next = to;`
New upstream version 3.21.2 3 years ago			`if (this->m_noconv) {`
New upstream version 3.9.0 8 years ago			`return std::codecvt_base::noconv;`
			`}`
New upstream version 3.8.0 8 years ago			`#if defined(_WIN32)`
New upstream version 3.9.0 8 years ago			`State& lstate = reinterpret_cast<State&>(state);`
			`if (lstate.buffered != 0) {`
			`return this->DecodePartial(state, to_next, to_end);`
New upstream version 3.8.0 8 years ago			`}`
New upstream version 3.9.0 8 years ago			`return std::codecvt_base::ok;`
New upstream version 3.8.0 8 years ago			`#else`
			`static_cast<void>(state);`
			`static_cast<void>(to_end);`
New upstream version 3.9.0 8 years ago			`return std::codecvt_base::ok;`
New upstream version 3.8.0 8 years ago			`#endif`
New upstream version 3.11.1 7 years ago			`}`
New upstream version 3.8.0 8 years ago
New upstream version 3.9.0 8 years ago			`#if defined(_WIN32)`
			`std::codecvt_base::result codecvt::Decode(mbstate_t& state, int size,`
			`const char*& from_next,`
			`char& to_next, char to_end) const`
			`{`
			`State& lstate = reinterpret_cast<State&>(state);`

			`// Collect all the bytes for this codepoint.`
			`char buf[4];`
			`memcpy(buf, lstate.partial, lstate.buffered);`
			`buf[lstate.buffered] = *from_next;`

			`// Convert the encoding.`
			`wchar_t wbuf[2];`
			`int wlen =`
			`MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, buf, size, wbuf, 2);`
			`if (wlen <= 0) {`
			`return std::codecvt_base::error;`
			`}`

			`int tlen = WideCharToMultiByte(m_codepage, 0, wbuf, wlen, to_next,`
			`to_end - to_next, NULL, NULL);`
			`if (tlen <= 0) {`
			`if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {`
			`return std::codecvt_base::partial;`
			`}`
			`return std::codecvt_base::error;`
			`}`

			`// Move past the now-consumed byte in the input buffer.`
			`++from_next;`

			`// Move past the converted codepoint in the output buffer.`
			`to_next += tlen;`

			`// Re-initialize the state for the next codepoint to start.`
			`lstate = State();`

			`return std::codecvt_base::ok;`
			`}`

			`std::codecvt_base::result codecvt::DecodePartial(mbstate_t& state,`
			`char*& to_next,`
			`char* to_end) const`
			`{`
			`State& lstate = reinterpret_cast<State&>(state);`

			`// Try converting the partial codepoint.`
			`wchar_t wbuf[2];`
			`int wlen = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, lstate.partial,`
			`lstate.buffered, wbuf, 2);`
			`if (wlen <= 0) {`
			`return std::codecvt_base::error;`
			`}`

			`int tlen = WideCharToMultiByte(m_codepage, 0, wbuf, wlen, to_next,`
			`to_end - to_next, NULL, NULL);`
			`if (tlen <= 0) {`
			`if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {`
			`return std::codecvt_base::partial;`
			`}`
			`return std::codecvt_base::error;`
			`}`

			`// Move past the converted codepoint in the output buffer.`
			`to_next += tlen;`

			`// Re-initialize the state for the next codepoint to start.`
			`lstate = State();`

			`return std::codecvt_base::ok;`
			`}`

			`void codecvt::BufferPartial(mbstate_t& state, int size,`
			`const char*& from_next) const`
			`{`
			`State& lstate = reinterpret_cast<State&>(state);`

			`// Save the byte in our buffer for later.`
			`lstate.partial[lstate.buffered++] = *from_next;`
			`lstate.size = size;`

			`// Move past the now-consumed byte in the input buffer.`
			`++from_next;`
			`}`
			`#endif`

New upstream version 3.8.0 8 years ago			`int codecvt::do_max_length() const throw()`
			`{`
			`return 4;`
New upstream version 3.11.1 7 years ago			`}`
New upstream version 3.8.0 8 years ago
			`int codecvt::do_encoding() const throw()`
			`{`
			`return 0;`
New upstream version 3.11.1 7 years ago			`}`