cmake/Utilities/cmxmlrpc/xmlrpc_utf8.c

/* Copyright (C) 2001 by Eric Kidd. All rights reserved.
**
** Redistribution and use in source and binary forms, with or without
** modification, are permitted provided that the following conditions
** are met:
** 1. Redistributions of source code must retain the above copyright
**    notice, this list of conditions and the following disclaimer.
** 2. Redistributions in binary form must reproduce the above copyright
**    notice, this list of conditions and the following disclaimer in the
**    documentation and/or other materials provided with the distribution.
** 3. The name of the author may not be used to endorse or promote products
**    derived from this software without specific prior written permission. 
**  
** THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
** ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
** FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
** DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
** OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
** HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
** LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
** OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
** SUCH DAMAGE. */


/*=========================================================================
**  XML-RPC UTF-8 Utilities
**=========================================================================
**  Routines for validating, encoding and decoding UTF-8 data.  We try to
**  be very, very strict about invalid UTF-8 data.
**
**  All of the code in this file assumes that your machine represents
**  wchar_t as a 16-bit (or wider) character containing UCS-2 data.  If this
**  assumption is incorrect, you may need to replace this file.
**
**  For lots of information on Unicode and UTF-8 decoding, see:
**    http://www.cl.cam.ac.uk/~mgk25/unicode.html
*/

#include "xmlrpc_config.h"

#include "xmlrpc.h"

#ifdef HAVE_UNICODE_WCHAR

/*=========================================================================
**  Tables and Constants
**=========================================================================
**  We use a variety of tables and constants to help decode and validate
**  UTF-8 data.
*/

/* The number of bytes in a UTF-8 sequence starting with the character used
** as the array index.  A zero entry indicates an illegal initial byte.
** This table was generated using a Perl script and information from the
** UTF-8 standard.
**
** Fredrik Lundh's UTF-8 decoder Python 2.0 uses a similar table.  But
** since Python 2.0 has the icky CNRI license, I regenerated this
** table from scratch and wrote my own decoder. */
static unsigned char utf8_seq_length[256] = {
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
};

/* The minimum legal character value for a UTF-8 sequence of the given
** length.  We have to check this to avoid accepting "overlong" UTF-8
** sequences, which use more bytes than necessary to encode a given
** character.  Such sequences are commonly used by evil people to bypass
** filters and security checks.  This table is based on the UTF-8-test.txt
** file by Markus Kuhn <mkuhn@acm.org>. */
static wchar_t utf8_min_char_for_length[4] = {
    0,          /* Length 0: Not used (meaningless) */
    0x0000,     /* Length 1: Not used (special-cased) */
    0x0080,     /* Length 2 */
    0x0800      /* Length 3 */

#if 0
    /* These are only useful on systems where wchar_t is 32-bits wide
    ** and supports full UCS-4. */
    0x00010000, /* Length 4 */
    0x00200000, /* Length 5 */
    0x04000000  /* Length 6 */
#endif
};

/* This is the maximum legal 16-byte (UCS-2) character.  Again, this
** information is based on UTF-8-test.txt. */
#define UCS2_MAX_LEGAL_CHARACTER (0xFFFD)

/* First and last UTF-16 surrogate characters.  These are *not* legal UCS-2
** characters--they're used to code for UCS-4 characters when using
** UTF-16.  They should never appear in decoded UTF-8 data!  Again, these
** could hypothetically be used to bypass security measures on some machines.
** Based on UTF-8-test.txt. */
#define UTF16_FIRST_SURROGATE (0xD800)
#define UTF16_LAST_SURROGATE  (0xDFFF)

/* Is the character 'c' a UTF-8 continuation character? */
#define IS_CONTINUATION(c) (((c) & 0xC0) == 0x80)

/* Maximum number of bytes needed to encode a supported character. */
#define MAX_ENCODED_BYTES (3)


/*=========================================================================
**  decode_utf8
**=========================================================================
**  Internal routine which decodes (or validates) a UTF-8 string.
**  To validate, set io_buff and out_buff_len to NULL.  To decode, allocate
**  a sufficiently large buffer, pass it as io_buff, and pass a pointer as
**  as out_buff_len.  The data will be written to the buffer, and the
**  length to out_buff_len.
**
**  We assume that wchar_t holds a single UCS-2 character in native-endian
**  byte ordering.
*/

static void 
decode_utf8(xmlrpc_env * const env,
            const char * const utf8_data,
            size_t       const utf8_len,
            wchar_t *    const io_buff,
            size_t *     const out_buff_len) {

    size_t i, length, out_pos;
    char init, con1, con2;
    wchar_t wc;

    XMLRPC_ASSERT_ENV_OK(env);
    XMLRPC_ASSERT_PTR_OK(utf8_data);
    XMLRPC_ASSERT((!io_buff && !out_buff_len) ||
                  (io_buff && out_buff_len));

    /* Suppress GCC warning about possibly undefined variable. */
    wc = 0;

    i = 0;
    out_pos = 0;
    while (i < utf8_len) {
        init = utf8_data[i];
        if ((init & 0x80) == 0x00) {
            /* Convert ASCII character to wide character. */
            wc = init;
            i++;
        } else {
            /* Look up the length of this UTF-8 sequence. */
            length = utf8_seq_length[(unsigned char) init];
            
            /* Check to make sure we have enough bytes to convert. */
            if (i + length > utf8_len)
                XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,
                            "Truncated UTF-8 sequence");
            
            /* Decode a multibyte UTF-8 sequence. */
            switch (length) {
            case 0:
                XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,
                            "Invalid UTF-8 initial byte");
                
            case 2:
                /* 110xxxxx 10xxxxxx */
                con1 = utf8_data[i+1];
                if (!IS_CONTINUATION(con1))
                    XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,
                                "UTF-8 sequence too short");
                wc = ((((wchar_t) (init & 0x1F)) <<  6) |
                      (((wchar_t) (con1 & 0x3F))));
                break;
                
            case 3:
                /* 1110xxxx 10xxxxxx 10xxxxxx */
                con1 = utf8_data[i+1];
                con2 = utf8_data[i+2];
                if (!IS_CONTINUATION(con1) || !IS_CONTINUATION(con2))
                    XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,
                                "UTF-8 sequence too short");
                wc = ((((wchar_t) (init & 0x0F)) << 12) |
                      (((wchar_t) (con1 & 0x3F)) <<  6) |
                      (((wchar_t) (con2 & 0x3F))));
                break;
                
            case 4:
                /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
            case 5:
                /* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
            case 6:
                /* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
                XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,
                            "UCS-4 characters not supported");
                
            default:
                XMLRPC_ASSERT("Error in UTF-8 decoder tables");
            }
                    
            /* Advance to the end of the sequence. */
            i += length;
            
            /* Check for illegal UCS-2 characters. */
            if (wc > UCS2_MAX_LEGAL_CHARACTER)
                XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,
                            "UCS-2 characters > U+FFFD are illegal");
            
            /* Check for UTF-16 surrogates. */
            if (UTF16_FIRST_SURROGATE <= wc && wc <= UTF16_LAST_SURROGATE)
                XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,
                            "UTF-16 surrogates may not appear in UTF-8 data");
            
            /* Check for overlong sequences. */
            if (wc < utf8_min_char_for_length[length])
                XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,
                            "Overlong UTF-8 sequence not allowed");
        }
        
        /* If we have a buffer, write our character to it. */
        if (io_buff) {
            io_buff[out_pos++] = wc;
        }
    }
    
    /* Record the number of characters we found. */
    if (out_buff_len)
        *out_buff_len = out_pos;
    
 cleanup:
    if (env->fault_occurred) {
        if (out_buff_len)
            *out_buff_len = 0;
    }
}


/*=========================================================================
**  xmlrpc_validate_utf8
**=========================================================================
**  Make sure that a UTF-8 string is valid.
*/

void 
xmlrpc_validate_utf8 (xmlrpc_env * const env,
                      const char * const utf8_data,
                      size_t       const utf8_len) {

    decode_utf8(env, utf8_data, utf8_len, NULL, NULL);
}


/*=========================================================================
**  xmlrpc_utf8_to_wcs
**=========================================================================
**  Decode UTF-8 string to a "wide character string".  This function
**  returns an xmlrpc_mem_block with an element type of wchar_t.  Don't
**  try to intepret the block in a bytewise fashion--it won't work in
**  any useful or portable fashion.
*/

xmlrpc_mem_block *xmlrpc_utf8_to_wcs (xmlrpc_env *env,
                                      char *utf8_data,
                                      size_t utf8_len)
{
    xmlrpc_mem_block *output;
    size_t wcs_length;

    /* Allocate a memory block large enough to hold any possible output.
    ** We assume that each byte of the input may decode to a whcar_t. */
    output = XMLRPC_TYPED_MEM_BLOCK_NEW(wchar_t, env, utf8_len);
    XMLRPC_FAIL_IF_FAULT(env);

    /* Decode the UTF-8 data. */
    decode_utf8(env, utf8_data, utf8_len,
                XMLRPC_TYPED_MEM_BLOCK_CONTENTS(wchar_t, output),
                &wcs_length);
    XMLRPC_FAIL_IF_FAULT(env);

    /* Make sure we didn't overrun our buffer. */
    XMLRPC_ASSERT(wcs_length <= utf8_len);

    /* Correct the length of the memory block. */
    XMLRPC_TYPED_MEM_BLOCK_RESIZE(wchar_t, env, output, wcs_length);
    XMLRPC_FAIL_IF_FAULT(env);

 cleanup:
    if (env->fault_occurred) {
        if (output)
            xmlrpc_mem_block_free(output);
        return NULL;
    }
    return output;
}


/*=========================================================================
**  xmlrpc_utf8_to_wcs
**=========================================================================
**  Encode a "wide character string" as UTF-8.
*/

xmlrpc_mem_block *xmlrpc_wcs_to_utf8 (xmlrpc_env *env,
                                      wchar_t *wcs_data,
                                      size_t wcs_len)
{
    size_t estimate, bytes_used, i;
    xmlrpc_mem_block *output;
    unsigned char *buffer;
    wchar_t wc;
    int cwc;

    XMLRPC_ASSERT_ENV_OK(env);
    XMLRPC_ASSERT_PTR_OK(wcs_data);

    /* Allocate a memory block large enough to hold any possible output.
    ** We assume that every wchar might encode to the maximum length. */
    estimate = wcs_len * MAX_ENCODED_BYTES;
    output = XMLRPC_TYPED_MEM_BLOCK_NEW(char, env, estimate);
    XMLRPC_FAIL_IF_FAULT(env);

    /* Output our characters. */
    buffer = (unsigned char*) XMLRPC_TYPED_MEM_BLOCK_CONTENTS(char, output);
    bytes_used = 0;
    for (i = 0; i < wcs_len; i++) {
        wc = wcs_data[i];
        cwc = wc;
        if (cwc <= 0x007F) {
            buffer[bytes_used++] = wc & 0x7F;
        } else if (cwc <= 0x07FF) {
            /* 110xxxxx 10xxxxxx */
            buffer[bytes_used++] = 0xC0 | (wc >> 6);
            buffer[bytes_used++] = 0x80 | (wc & 0x3F);
        } else if (cwc <= 0xFFFF) {
            /* 1110xxxx 10xxxxxx 10xxxxxx */
            buffer[bytes_used++] = 0xE0 | (wc >> 12);
            buffer[bytes_used++] = 0x80 | ((wc >> 6) & 0x3F);
            buffer[bytes_used++] = 0x80 | (wc & 0x3F);
        } else {
            XMLRPC_FAIL(env, XMLRPC_INTERNAL_ERROR,
                        "Don't know how to encode UCS-4 characters yet");
        }
    }

    /* Make sure we didn't overrun our buffer. */
    XMLRPC_ASSERT(bytes_used <= estimate);

    /* Correct the length of the memory block. */
    XMLRPC_TYPED_MEM_BLOCK_RESIZE(char, env, output, bytes_used);
    XMLRPC_FAIL_IF_FAULT(env);

 cleanup:
    if (env->fault_occurred) {
        if (output)
            xmlrpc_mem_block_free(output);
        return NULL;
    }
    return output;
}

#endif /* HAVE_UNICODE_WCHAR */
import cmake 2.6.0 Signed-off-by: Pierre Habouzit <madcoder@debian.org> 16 years ago			`/* Copyright (C) 2001 by Eric Kidd. All rights reserved.`
			`**`
			`** Redistribution and use in source and binary forms, with or without`
			`** modification, are permitted provided that the following conditions`
			`** are met:`
			`** 1. Redistributions of source code must retain the above copyright`
			`** notice, this list of conditions and the following disclaimer.`
			`** 2. Redistributions in binary form must reproduce the above copyright`
			`** notice, this list of conditions and the following disclaimer in the`
			`** documentation and/or other materials provided with the distribution.`
			`** 3. The name of the author may not be used to endorse or promote products`
			`** derived from this software without specific prior written permission.`
			`**`
			** THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
			`** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE`
			`** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE`
			`** ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE`
			`** FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL`
			`** DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS`
			`** OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)`
			`** HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT`
			`** LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY`
			`** OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF`
			`** SUCH DAMAGE. */`


			`/*=========================================================================`
			`** XML-RPC UTF-8 Utilities`
			`**=========================================================================`
			`** Routines for validating, encoding and decoding UTF-8 data. We try to`
			`** be very, very strict about invalid UTF-8 data.`
			`**`
			`** All of the code in this file assumes that your machine represents`
			`** wchar_t as a 16-bit (or wider) character containing UCS-2 data. If this`
			`** assumption is incorrect, you may need to replace this file.`
			`**`
			`** For lots of information on Unicode and UTF-8 decoding, see:`
			`** http://www.cl.cam.ac.uk/~mgk25/unicode.html`
			`*/`

			`#include "xmlrpc_config.h"`

			`#include "xmlrpc.h"`

			`#ifdef HAVE_UNICODE_WCHAR`

			`/*=========================================================================`
			`** Tables and Constants`
			`**=========================================================================`
			`** We use a variety of tables and constants to help decode and validate`
			`** UTF-8 data.`
			`*/`

			`/* The number of bytes in a UTF-8 sequence starting with the character used`
			`** as the array index. A zero entry indicates an illegal initial byte.`
			`** This table was generated using a Perl script and information from the`
			`** UTF-8 standard.`
			`**`
			`** Fredrik Lundh's UTF-8 decoder Python 2.0 uses a similar table. But`
			`** since Python 2.0 has the icky CNRI license, I regenerated this`
			`** table from scratch and wrote my own decoder. */`
			`static unsigned char utf8_seq_length[256] = {`
			`1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,`
			`1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,`
			`1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,`
			`1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,`
			`1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,`
			`1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,`
			`1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,`
			`1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,`
			`0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,`
			`0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,`
			`0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,`
			`0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,`
			`2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,`
			`2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,`
			`3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,`
			`4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0`
			`};`

			`/* The minimum legal character value for a UTF-8 sequence of the given`
			`** length. We have to check this to avoid accepting "overlong" UTF-8`
			`** sequences, which use more bytes than necessary to encode a given`
			`** character. Such sequences are commonly used by evil people to bypass`
			`** filters and security checks. This table is based on the UTF-8-test.txt`
			`** file by Markus Kuhn <mkuhn@acm.org>. */`
			`static wchar_t utf8_min_char_for_length[4] = {`
			`0, /* Length 0: Not used (meaningless) */`
			`0x0000, /* Length 1: Not used (special-cased) */`
			`0x0080, /* Length 2 */`
			`0x0800 /* Length 3 */`

			`#if 0`
			`/* These are only useful on systems where wchar_t is 32-bits wide`
			`** and supports full UCS-4. */`
			`0x00010000, /* Length 4 */`
			`0x00200000, /* Length 5 */`
			`0x04000000 /* Length 6 */`
			`#endif`
			`};`

			`/* This is the maximum legal 16-byte (UCS-2) character. Again, this`
			`** information is based on UTF-8-test.txt. */`
			`#define UCS2_MAX_LEGAL_CHARACTER (0xFFFD)`

			`/* First and last UTF-16 surrogate characters. These are not legal UCS-2`
			`** characters--they're used to code for UCS-4 characters when using`
			`** UTF-16. They should never appear in decoded UTF-8 data! Again, these`
			`** could hypothetically be used to bypass security measures on some machines.`
			`** Based on UTF-8-test.txt. */`
			`#define UTF16_FIRST_SURROGATE (0xD800)`
			`#define UTF16_LAST_SURROGATE (0xDFFF)`

			`/* Is the character 'c' a UTF-8 continuation character? */`
			`#define IS_CONTINUATION(c) (((c) & 0xC0) == 0x80)`

			`/* Maximum number of bytes needed to encode a supported character. */`
			`#define MAX_ENCODED_BYTES (3)`


			`/*=========================================================================`
			`** decode_utf8`
			`**=========================================================================`
			`** Internal routine which decodes (or validates) a UTF-8 string.`
			`** To validate, set io_buff and out_buff_len to NULL. To decode, allocate`
			`** a sufficiently large buffer, pass it as io_buff, and pass a pointer as`
			`** as out_buff_len. The data will be written to the buffer, and the`
			`** length to out_buff_len.`
			`**`
			`** We assume that wchar_t holds a single UCS-2 character in native-endian`
			`** byte ordering.`
			`*/`

			`static void`
			`decode_utf8(xmlrpc_env * const env,`
			`const char * const utf8_data,`
			`size_t const utf8_len,`
			`wchar_t * const io_buff,`
			`size_t * const out_buff_len) {`

			`size_t i, length, out_pos;`
			`char init, con1, con2;`
			`wchar_t wc;`

			`XMLRPC_ASSERT_ENV_OK(env);`
			`XMLRPC_ASSERT_PTR_OK(utf8_data);`
			`XMLRPC_ASSERT((!io_buff && !out_buff_len) \|\|`
			`(io_buff && out_buff_len));`

			`/* Suppress GCC warning about possibly undefined variable. */`
			`wc = 0;`

			`i = 0;`
			`out_pos = 0;`
			`while (i < utf8_len) {`
			`init = utf8_data[i];`
			`if ((init & 0x80) == 0x00) {`
			`/* Convert ASCII character to wide character. */`
			`wc = init;`
			`i++;`
			`} else {`
			`/* Look up the length of this UTF-8 sequence. */`
			`length = utf8_seq_length[(unsigned char) init];`

			`/* Check to make sure we have enough bytes to convert. */`
			`if (i + length > utf8_len)`
			`XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,`
			`"Truncated UTF-8 sequence");`

			`/* Decode a multibyte UTF-8 sequence. */`
			`switch (length) {`
			`case 0:`
			`XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,`
			`"Invalid UTF-8 initial byte");`

			`case 2:`
			`/* 110xxxxx 10xxxxxx */`
			`con1 = utf8_data[i+1];`
			`if (!IS_CONTINUATION(con1))`
			`XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,`
			`"UTF-8 sequence too short");`
			`wc = ((((wchar_t) (init & 0x1F)) << 6) \|`
			`(((wchar_t) (con1 & 0x3F))));`
			`break;`

			`case 3:`
			`/* 1110xxxx 10xxxxxx 10xxxxxx */`
			`con1 = utf8_data[i+1];`
			`con2 = utf8_data[i+2];`
			`if (!IS_CONTINUATION(con1) \|\| !IS_CONTINUATION(con2))`
			`XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,`
			`"UTF-8 sequence too short");`
			`wc = ((((wchar_t) (init & 0x0F)) << 12) \|`
			`(((wchar_t) (con1 & 0x3F)) << 6) \|`
			`(((wchar_t) (con2 & 0x3F))));`
			`break;`

			`case 4:`
			`/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */`
			`case 5:`
			`/* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */`
			`case 6:`
			`/* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */`
			`XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,`
			`"UCS-4 characters not supported");`

			`default:`
			`XMLRPC_ASSERT("Error in UTF-8 decoder tables");`
			`}`

			`/* Advance to the end of the sequence. */`
			`i += length;`

			`/* Check for illegal UCS-2 characters. */`
			`if (wc > UCS2_MAX_LEGAL_CHARACTER)`
			`XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,`
			`"UCS-2 characters > U+FFFD are illegal");`

			`/* Check for UTF-16 surrogates. */`
			`if (UTF16_FIRST_SURROGATE <= wc && wc <= UTF16_LAST_SURROGATE)`
			`XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,`
			`"UTF-16 surrogates may not appear in UTF-8 data");`

			`/* Check for overlong sequences. */`
			`if (wc < utf8_min_char_for_length[length])`
			`XMLRPC_FAIL(env, XMLRPC_INVALID_UTF8_ERROR,`
			`"Overlong UTF-8 sequence not allowed");`
			`}`

			`/* If we have a buffer, write our character to it. */`
			`if (io_buff) {`
			`io_buff[out_pos++] = wc;`
			`}`
			`}`

			`/* Record the number of characters we found. */`
			`if (out_buff_len)`
			`*out_buff_len = out_pos;`

			`cleanup:`
			`if (env->fault_occurred) {`
			`if (out_buff_len)`
			`*out_buff_len = 0;`
			`}`
			`}`



			`/*=========================================================================`
			`** xmlrpc_validate_utf8`
			`**=========================================================================`
			`** Make sure that a UTF-8 string is valid.`
			`*/`

			`void`
			`xmlrpc_validate_utf8 (xmlrpc_env * const env,`
			`const char * const utf8_data,`
			`size_t const utf8_len) {`

			`decode_utf8(env, utf8_data, utf8_len, NULL, NULL);`
			`}`


			`/*=========================================================================`
			`** xmlrpc_utf8_to_wcs`
			`**=========================================================================`
			`** Decode UTF-8 string to a "wide character string". This function`
			`** returns an xmlrpc_mem_block with an element type of wchar_t. Don't`
			`** try to intepret the block in a bytewise fashion--it won't work in`
			`** any useful or portable fashion.`
			`*/`

			`xmlrpc_mem_block xmlrpc_utf8_to_wcs (xmlrpc_env env,`
			`char *utf8_data,`
			`size_t utf8_len)`
			`{`
			`xmlrpc_mem_block *output;`
			`size_t wcs_length;`

			`/* Allocate a memory block large enough to hold any possible output.`
			`** We assume that each byte of the input may decode to a whcar_t. */`
			`output = XMLRPC_TYPED_MEM_BLOCK_NEW(wchar_t, env, utf8_len);`
			`XMLRPC_FAIL_IF_FAULT(env);`

			`/* Decode the UTF-8 data. */`
			`decode_utf8(env, utf8_data, utf8_len,`
			`XMLRPC_TYPED_MEM_BLOCK_CONTENTS(wchar_t, output),`
			`&wcs_length);`
			`XMLRPC_FAIL_IF_FAULT(env);`

			`/* Make sure we didn't overrun our buffer. */`
			`XMLRPC_ASSERT(wcs_length <= utf8_len);`

			`/* Correct the length of the memory block. */`
			`XMLRPC_TYPED_MEM_BLOCK_RESIZE(wchar_t, env, output, wcs_length);`
			`XMLRPC_FAIL_IF_FAULT(env);`

			`cleanup:`
			`if (env->fault_occurred) {`
			`if (output)`
			`xmlrpc_mem_block_free(output);`
			`return NULL;`
			`}`
			`return output;`
			`}`


			`/*=========================================================================`
			`** xmlrpc_utf8_to_wcs`
			`**=========================================================================`
			`** Encode a "wide character string" as UTF-8.`
			`*/`

			`xmlrpc_mem_block xmlrpc_wcs_to_utf8 (xmlrpc_env env,`
			`wchar_t *wcs_data,`
			`size_t wcs_len)`
			`{`
			`size_t estimate, bytes_used, i;`
			`xmlrpc_mem_block *output;`
			`unsigned char *buffer;`
			`wchar_t wc;`
			`int cwc;`

			`XMLRPC_ASSERT_ENV_OK(env);`
			`XMLRPC_ASSERT_PTR_OK(wcs_data);`

			`/* Allocate a memory block large enough to hold any possible output.`
			`** We assume that every wchar might encode to the maximum length. */`
			`estimate = wcs_len * MAX_ENCODED_BYTES;`
			`output = XMLRPC_TYPED_MEM_BLOCK_NEW(char, env, estimate);`
			`XMLRPC_FAIL_IF_FAULT(env);`

			`/* Output our characters. */`
			`buffer = (unsigned char*) XMLRPC_TYPED_MEM_BLOCK_CONTENTS(char, output);`
			`bytes_used = 0;`
			`for (i = 0; i < wcs_len; i++) {`
			`wc = wcs_data[i];`
			`cwc = wc;`
			`if (cwc <= 0x007F) {`
			`buffer[bytes_used++] = wc & 0x7F;`
			`} else if (cwc <= 0x07FF) {`
			`/* 110xxxxx 10xxxxxx */`
			`buffer[bytes_used++] = 0xC0 \| (wc >> 6);`
			`buffer[bytes_used++] = 0x80 \| (wc & 0x3F);`
			`} else if (cwc <= 0xFFFF) {`
			`/* 1110xxxx 10xxxxxx 10xxxxxx */`
			`buffer[bytes_used++] = 0xE0 \| (wc >> 12);`
			`buffer[bytes_used++] = 0x80 \| ((wc >> 6) & 0x3F);`
			`buffer[bytes_used++] = 0x80 \| (wc & 0x3F);`
			`} else {`
			`XMLRPC_FAIL(env, XMLRPC_INTERNAL_ERROR,`
			`"Don't know how to encode UCS-4 characters yet");`
			`}`
			`}`

			`/* Make sure we didn't overrun our buffer. */`
			`XMLRPC_ASSERT(bytes_used <= estimate);`

			`/* Correct the length of the memory block. */`
			`XMLRPC_TYPED_MEM_BLOCK_RESIZE(char, env, output, bytes_used);`
			`XMLRPC_FAIL_IF_FAULT(env);`

			`cleanup:`
			`if (env->fault_occurred) {`
			`if (output)`
			`xmlrpc_mem_block_free(output);`
			`return NULL;`
			`}`
			`return output;`
			`}`

			`#endif /* HAVE_UNICODE_WCHAR */`