scuffed-code/icu4c/source/common/ustring.c

/*
******************************************************************************
*
*   Copyright (C) 1998-2001, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************
*
* File ustring.h
*
* Modification History:
*
*   Date        Name        Description
*   12/07/98    bertrand    Creation.
******************************************************************************
*/

#include "unicode/utypes.h"
#include "unicode/ustring.h"
#include "unicode/putil.h"
#include "unicode/ucnv.h"
#include "cstring.h"
#include "cwchar.h"
#include "cmemory.h"
#include "umutex.h"
#include "ustr_imp.h"
#include "ucln_cmn.h"

/* forward declaractions of definitions for the shared default converter */

static UConverter *gDefaultConverter = NULL;

/* ANSI string.h - style functions ------------------------------------------ */

#define MAX_STRLEN 0x0FFFFFFF

/* ---- String searching functions ---- */

U_CAPI UChar* U_EXPORT2
u_strchr(const UChar *s, UChar c) 
{
  while (*s && *s != c) {
    ++s;
  }
  if (*s == c)
    return (UChar *)s;
  return NULL;
}

/* A Boyer-Moore algorithm would be better, but that would require a hashtable
   because UChar is so big. This algorithm doesn't use a lot of extra memory.
 */
U_CAPI UChar * U_EXPORT2
u_strstr(const UChar *s, const UChar *substring) {

  UChar *strItr, *subItr;

  if (*substring == 0) {
    return (UChar *)s;
  }

  do {
    strItr = (UChar *)s;
    subItr = (UChar *)substring;

    /* Only one string iterator needs checking for null terminator */
    while ((*strItr != 0) && (*strItr == *subItr)) {
      strItr++;
      subItr++;
    }

    if (*subItr == 0) {             /* Was the end of the substring reached? */
      return (UChar *)s;
    }

    s++;
  } while (*strItr != 0);           /* Was the end of the string reached? */

  return NULL;                      /* No match */
}

/**
 * Check if there is an unmatched surrogate c in a string [start..limit[ at s.
 * start<=s<limit or limit==NULL
 * @return TRUE if *s is unmatched
 */
static U_INLINE UBool
uprv_isSingleSurrogate(const UChar *start, const UChar *s, UChar c, const UChar *limit) {
    if(UTF_IS_SURROGATE_FIRST(c)) {
        ++s;
        return (UBool)(s==limit || !UTF_IS_TRAIL(*s));
    } else {
        return (UBool)(s==start || !UTF_IS_LEAD(*(s-1)));
    }
}

U_CFUNC const UChar *
uprv_strFindSurrogate(const UChar *s, int32_t length, UChar surrogate) {
    const UChar *limit, *t;
    UChar c;

    if(length>=0) {
        limit=s+length;
    } else {
        limit=NULL;
    }

    for(t=s; t!=limit && ((c=*t)!=0 || limit!=NULL); ++t) {
        if(c==surrogate && uprv_isSingleSurrogate(s, t, c, limit)) {
            return t;
        }
    }

    return NULL;
}

U_CFUNC const UChar *
uprv_strFindLastSurrogate(const UChar *s, int32_t length, UChar surrogate) {
    const UChar *limit, *t;
    UChar c;

    if(length>=0) {
        limit=s+length;
    } else {
        limit=s+u_strlen(s);
    }

    for(t=limit; t!=s;) {
        c=*--t;
        if(c==surrogate && uprv_isSingleSurrogate(s, t, c, limit)) {
            return t;
        }
    }

    return NULL;
}

U_CAPI UChar * U_EXPORT2
u_strchr32(const UChar *s, UChar32 c) {
  if(c < 0xd800) {
    /* non-surrogate BMP code point */
    return u_strchr(s, (UChar)c);
  } else if(c <= 0xdfff) {
    /* surrogate code point */
    return (UChar *)uprv_strFindSurrogate(s, -1, (UChar)c);
  } else if(c <= 0xffff) {
    /* non-surrogate BMP code point */
    return u_strchr(s, (UChar)c);
  } else {
    /* supplementary code point, search for string */
    UChar buffer[3];

    buffer[0] = UTF16_LEAD(c);
    buffer[1] = UTF16_TRAIL(c);
    buffer[2] = 0;
    return u_strstr(s, buffer);
  }
}

/* Search for a codepoint in a string that matches one of the matchSet codepoints. */
U_CAPI UChar * U_EXPORT2
u_strpbrk(const UChar *string, const UChar *matchSet)
{
    int32_t matchLen;
    UBool single = TRUE;

    for (matchLen = 0; matchSet[matchLen]; matchLen++)
    {
        if (!UTF_IS_SINGLE(matchSet[matchLen]))
        {
            single = FALSE;
        }
    }

    if (single)
    {
        const UChar *matchItr;
        const UChar *strItr;

        for (strItr = string; *strItr; strItr++)
        {
            for (matchItr = matchSet; *matchItr; matchItr++)
            {
                if (*matchItr == *strItr)
                {
                    return (UChar *)strItr;
                }
            }
        }
    }
    else
    {
        int32_t matchItr;
        int32_t strItr;
        UChar32 stringCh, matchSetCh;
        int32_t stringLen = u_strlen(string);

        for (strItr = 0; strItr < stringLen; strItr++)
        {
            UTF_GET_CHAR_SAFE(string, 0, strItr, stringLen, stringCh, TRUE);
            for (matchItr = 0; matchItr < matchLen; matchItr++)
            {
                UTF_GET_CHAR_SAFE(matchSet, 0, matchItr, matchLen, matchSetCh, TRUE);
                if (stringCh == matchSetCh && (stringCh != UTF_ERROR_VALUE
                    || string[strItr] == UTF_ERROR_VALUE
                    || (matchSetCh == UTF_ERROR_VALUE && !UTF_IS_SINGLE(matchSet[matchItr]))))
                {
                    return (UChar *)string + strItr;
                }
            }
        }
    }

    /* Didn't find it. */
    return NULL;
}

/* Search for a codepoint in a string that matches one of the matchSet codepoints. */
U_CAPI int32_t U_EXPORT2
u_strcspn(const UChar *string, const UChar *matchSet)
{
    const UChar *foundStr = u_strpbrk(string, matchSet);
    if (foundStr == NULL)
    {
        return u_strlen(string);
    }
    return foundStr - string;
}

/* Search for a codepoint in a string that does not match one of the matchSet codepoints. */
U_CAPI int32_t U_EXPORT2
u_strspn(const UChar *string, const UChar *matchSet)
{
    UBool single = TRUE;
    UBool match = TRUE;
    int32_t matchLen;
    int32_t retValue;

    for (matchLen = 0; matchSet[matchLen]; matchLen++)
    {
        if (!UTF_IS_SINGLE(matchSet[matchLen]))
        {
            single = FALSE;
        }
    }

    if (single)
    {
        const UChar *matchItr;
        const UChar *strItr;

        for (strItr = string; *strItr && match; strItr++)
        {
            match = FALSE;
            for (matchItr = matchSet; *matchItr; matchItr++)
            {
                if (*matchItr == *strItr)
                {
                    match = TRUE;
                    break;
                }
            }
        }
        retValue = strItr - string - (match == FALSE);
    }
    else
    {
        int32_t matchItr;
        int32_t strItr;
        UChar32 stringCh, matchSetCh;
        int32_t stringLen = u_strlen(string);

        for (strItr = 0; strItr < stringLen && match; strItr++)
        {
            match = FALSE;
            UTF_GET_CHAR_SAFE(string, 0, strItr, stringLen, stringCh, TRUE);
            for (matchItr = 0; matchItr < matchLen; matchItr++)
            {
                UTF_GET_CHAR_SAFE(matchSet, 0, matchItr, matchLen, matchSetCh, TRUE);
                if (stringCh == matchSetCh && (stringCh != UTF_ERROR_VALUE
                    || string[strItr] == UTF_ERROR_VALUE
                    || (matchSetCh == UTF_ERROR_VALUE && !UTF_IS_SINGLE(matchSet[matchItr]))))
                {
                    match = TRUE;
                    break;
                }
            }
        }
        retValue = strItr - (match == FALSE);
    }

    /* Found a mismatch or didn't find it. */
    return retValue;
}

/* ----- Text manipulation functions --- */

U_CAPI UChar* U_EXPORT2
u_strtok_r(UChar    *src, 
     const UChar    *delim,
           UChar   **saveState)
{
    UChar *tokSource;
    UChar *nextToken;
    uint32_t nonDelimIdx;

    /* If saveState is NULL, the user messed up. */
    if (src != NULL) {
        tokSource = src;
        *saveState = src; /* Set to "src" in case there are no delimiters */
    }
    else if (*saveState) {
        tokSource = *saveState;
    }
    else {
        /* src == NULL && *saveState == NULL */
        /* This shouldn't happen. We already finished tokenizing. */
        return NULL;
    }

    /* Skip initial delimiters */
    nonDelimIdx = u_strspn(tokSource, delim);
    tokSource = &tokSource[nonDelimIdx];

    if (*tokSource) {
        nextToken = u_strpbrk(tokSource, delim);
        if (nextToken != NULL) {
            /* Create a token */
            *(nextToken++) = 0;
            *saveState = nextToken;
            return tokSource;
        }
        else if (*saveState) {
            /* Return the last token */
            *saveState = NULL;
            return tokSource;
        }
    }
    else {
        /* No tokens were found. Only delimiters were left. */
        *saveState = NULL;
    }
    return NULL;
}

U_CAPI UChar* U_EXPORT2
u_strcat(UChar     *dst, 
    const UChar     *src)
{
    UChar *anchor = dst;            /* save a pointer to start of dst */

    while(*dst != 0) {              /* To end of first string          */
        ++dst;
    }
    while((*(dst++) = *(src++)) != 0) {     /* copy string 2 over              */
    }

    return anchor;
}

U_CAPI UChar*  U_EXPORT2
u_strncat(UChar     *dst, 
     const UChar     *src, 
     int32_t     n ) 
{
    if(n > 0) {
        UChar *anchor = dst;            /* save a pointer to start of dst */

        while(*dst != 0) {              /* To end of first string          */
            ++dst;
        }
        while((*dst = *src) != 0) {     /* copy string 2 over              */
            ++dst;
            if(--n == 0) {
                *dst = 0;
                break;
            }
            ++src;
        }

        return anchor;
    } else {
        return dst;
    }
}

/* ----- Text property functions --- */

U_CAPI int32_t   U_EXPORT2
u_strcmp(const UChar *s1, 
    const UChar *s2) 
{
    UChar  c1, c2;

    for(;;) {
        c1=*s1++;
        c2=*s2++;
        if (c1 != c2 || c1 == 0) {
            break;
        }
    }
    return (int32_t)c1 - (int32_t)c2;
}

U_CAPI int32_t U_EXPORT2
uprv_strCompare(const UChar *s1, int32_t length1,
                const UChar *s2, int32_t length2,
                UBool strncmpStyle, UBool codePointOrder) {
    const UChar *start1, *start2, *limit1, *limit2;
    UChar c1, c2;

    /* setup for fix-up */
    start1=s1;
    start2=s2;

    /* compare identical prefixes - they do not need to be fixed up */
    if(length1<0 && length2<0) {
        /* strcmp style, both NUL-terminated */
        if(s1==s2) {
            return 0;
        }

        for(;;) {
            c1=*s1;
            c2=*s2;
            if(c1!=c2) {
                break;
            }
            if(c1==0) {
                return 0;
            }
            ++s1;
            ++s2;
        }

        /* setup for fix-up */
        limit1=limit2=NULL;
    } else if(strncmpStyle) {
        /* special handling for strncmp, assume length1==length2>=0 but also check for NUL */
        if(s1==s2) {
            return 0;
        }

        limit1=start1+length1;

        for(;;) {
            /* both lengths are same, check only one limit */
            if(s1==limit1) {
                return 0;
            }

            c1=*s1;
            c2=*s2;
            if(c1!=c2) {
                break;
            }
            if(c1==0) {
                return 0;
            }
            ++s1;
            ++s2;
        }

        /* setup for fix-up */
        limit2=start2+length1; /* use length1 here, too, to enforce assumption */
    } else {
        /* memcmp/UnicodeString style, both length-specified */
        int32_t lengthResult;

        if(length1<0) {
            length1=u_strlen(s1);
        }
        if(length2<0) {
            length2=u_strlen(s2);
        }

        /* limit1=start1+min(lenght1, length2) */
        if(length1<length2) {
            lengthResult=-1;
            limit1=start1+length1;
        } else if(length1==length2) {
            lengthResult=0;
            limit1=start1+length1;
        } else /* length1>length2 */ {
            lengthResult=1;
            limit1=start1+length2;
        }

        if(s1==s2) {
            return lengthResult;
        }

        for(;;) {
            /* check pseudo-limit */
            if(s1==limit1) {
                return lengthResult;
            }

            c1=*s1;
            c2=*s2;
            if(c1!=c2) {
                break;
            }
            ++s1;
            ++s2;
        }

        /* setup for fix-up */
        limit1=start1+length1;
        limit2=start2+length2;
    }

    /* if both values are in or above the surrogate range, fix them up */
    if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
        /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
        if(
            (c1<=0xdbff && (s1+1)!=limit1 && UTF_IS_TRAIL(*(s1+1))) ||
            (UTF_IS_TRAIL(c1) && start1!=s1 && UTF_IS_LEAD(*(s1-1)))
        ) {
            /* part of a surrogate pair, leave >=d800 */
        } else {
            /* BMP code point - may be surrogate code point - make <d800 */
            c1-=0x2800;
        }

        if(
            (c2<=0xdbff && (s2+1)!=limit2 && UTF_IS_TRAIL(*(s2+1))) ||
            (UTF_IS_TRAIL(c2) && start2!=s2 && UTF_IS_LEAD(*(s2-1)))
        ) {
            /* part of a surrogate pair, leave >=d800 */
        } else {
            /* BMP code point - may be surrogate code point - make <d800 */
            c2-=0x2800;
        }
    }

    /* now c1 and c2 are in UTF-32-compatible order */
    return (int32_t)c1-(int32_t)c2;
}

U_CAPI int32_t U_EXPORT2
u_strCompare(const UChar *s1, int32_t length1,
             const UChar *s2, int32_t length2,
             UBool codePointOrder) {
    /* argument checking */
    if(s1==NULL || length1<-1 || s2==NULL || length2<-1) {
        return 0;
    }
    return uprv_strCompare(s1, length1, s2, length2, FALSE, codePointOrder);
}

/* String compare in code point order - u_strcmp() compares in code unit order. */
U_CAPI int32_t U_EXPORT2
u_strcmpCodePointOrder(const UChar *s1, const UChar *s2) {
    return uprv_strCompare(s1, -1, s2, -1, FALSE, TRUE);
}

U_CAPI int32_t   U_EXPORT2
u_strncmp(const UChar     *s1, 
     const UChar     *s2, 
     int32_t     n) 
{
    if(n > 0) {
        int32_t rc;
        for(;;) {
            rc = (int32_t)*s1 - (int32_t)*s2;
            if(rc != 0 || *s1 == 0 || --n == 0) {
                return rc;
            }
            ++s1;
            ++s2;
        }
    } else {
        return 0;
    }
}

U_CAPI int32_t U_EXPORT2
u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n) {
    return uprv_strCompare(s1, n, s2, n, TRUE, TRUE);
}

U_CAPI UChar* U_EXPORT2
u_strcpy(UChar     *dst, 
    const UChar     *src) 
{
    UChar *anchor = dst;            /* save a pointer to start of dst */

    while((*(dst++) = *(src++)) != 0) {     /* copy string 2 over              */
    }

    return anchor;
}

U_CAPI UChar*  U_EXPORT2
u_strncpy(UChar     *dst, 
     const UChar     *src, 
     int32_t     n) 
{
    UChar *anchor = dst;            /* save a pointer to start of dst */

    /* copy string 2 over */
    while(n > 0 && (*(dst++) = *(src++)) != 0) {
        --n;
    }

    return anchor;
}

U_CAPI int32_t   U_EXPORT2
u_strlen(const UChar *s) 
{
#if U_SIZEOF_WCHAR_T == U_SIZEOF_UCHAR
    return uprv_wcslen(s);
#else
    const UChar *t = s;
    while(*t != 0) {
      ++t;
    }
    return t - s;
#endif
}

U_CAPI int32_t U_EXPORT2
u_countChar32(const UChar *s, int32_t length) {
    int32_t count;

    if(s==NULL || length<-1) {
        return 0;
    }

    count=0;
    if(length>=0) {
        while(length>0) {
            ++count;
            if(UTF_IS_LEAD(*s) && length>=2 && UTF_IS_TRAIL(*(s+1))) {
                s+=2;
                length-=2;
            } else {
                ++s;
                --length;
            }
        }
    } else /* length==-1 */ {
        UChar c;

        for(;;) {
            if((c=*s++)==0) {
                break;
            }
            ++count;

            /*
             * sufficient to look ahead one because of UTF-16;
             * safe to look ahead one because at worst that would be the terminating NUL
             */
            if(UTF_IS_LEAD(c) && UTF_IS_TRAIL(*s)) {
                ++s;
            }
        }
    }
    return count;
}

U_CAPI UChar * U_EXPORT2
u_memcpy(UChar *dest, const UChar *src, int32_t count) {
    return (UChar *)uprv_memcpy(dest, src, count*U_SIZEOF_UCHAR);
}

U_CAPI UChar * U_EXPORT2
u_memmove(UChar *dest, const UChar *src, int32_t count) {
    return (UChar *)uprv_memmove(dest, src, count*U_SIZEOF_UCHAR);
}

U_CAPI UChar * U_EXPORT2
u_memset(UChar *dest, UChar c, int32_t count) {
    if(count > 0) {
        UChar *ptr = dest;
        UChar *limit = dest + count;

        while (ptr < limit) {
            *(ptr++) = c;
        }
    }
    return dest;
}

U_CAPI int32_t U_EXPORT2
u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count) {
    if(count > 0) {
        const UChar *limit = buf1 + count;
        int32_t result;

        while (buf1 < limit) {
            result = (int32_t)(uint16_t)*buf1 - (int32_t)(uint16_t)*buf2;
            if (result != 0) {
                return result;
            }
            buf1++;
            buf2++;
        }
    }
    return 0;
}

U_CAPI int32_t U_EXPORT2
u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count) {
    return uprv_strCompare(s1, count, s2, count, FALSE, TRUE);
}

U_CAPI UChar * U_EXPORT2
u_memchr(const UChar *src, UChar ch, int32_t count) {
    if(count > 0) {
        const UChar *ptr = src;
        const UChar *limit = src + count;

        do {
            if (*ptr == ch) {
                return (UChar *)ptr;
            }
        } while (++ptr < limit);
    }
    return NULL;
}

U_CAPI UChar * U_EXPORT2
u_memchr32(const UChar *src, UChar32 ch, int32_t count) {
    if(count<=0 || (uint32_t)ch>0x10ffff) {
        return NULL; /* no string, or illegal arguments */
    }

    if(ch<0xd800) {
        /* non-surrogate BMP code point */
        return u_memchr(src, (UChar)ch, count); /* BMP, single UChar */
    } else if(ch<=0xdfff) {
        /* surrogate code point */
        return (UChar *)uprv_strFindSurrogate(src, count, (UChar)ch);
    } else if(ch<=0xffff) {
        return u_memchr(src, (UChar)ch, count); /* BMP, single UChar */
    } else if(count<2) {
        return NULL; /* too short for a surrogate pair */
    } else {
        const UChar *limit=src+count-1; /* -1 so that we do not need a separate check for the trail unit */
        UChar lead=UTF16_LEAD(ch), trail=UTF16_TRAIL(ch);

        do {
            if(*src==lead && *(src+1)==trail) {
                return (UChar *)src;
            }
        } while(++src<limit);
        return NULL;
    }
}

/* conversions between char* and UChar* ------------------------------------- */

/*
 returns the minimum of (the length of the null-terminated string) and n.
*/
static int32_t u_astrnlen(const char *s1, int32_t n)
{
    int32_t len = 0;

    if (s1)
    {
        while (*(s1++) && n--)
        {
            len++;
        }
    }
    return len;
}

U_CAPI UChar*  U_EXPORT2
u_uastrncpy(UChar *ucs1,
           const char *s2,
           int32_t n)
{
  UChar *target = ucs1;
  UErrorCode err = U_ZERO_ERROR;
  UConverter *cnv = u_getDefaultConverter(&err);
  if(U_SUCCESS(err) && cnv != NULL) {
    ucnv_reset(cnv);
    ucnv_toUnicode(cnv,
                   &target,
                   ucs1+n,
                   &s2,
                   s2+u_astrnlen(s2, n),
                   NULL,
                   TRUE,
                   &err);
    ucnv_reset(cnv); /* be good citizens */
    u_releaseDefaultConverter(cnv);
    if(U_FAILURE(err) && (err != U_BUFFER_OVERFLOW_ERROR) ) {
      *ucs1 = 0; /* failure */
    }
    if(target < (ucs1+n)) { /* U_BUFFER_OVERFLOW_ERROR isn't an err, just means no termination will happen. */
      *target = 0;  /* terminate */
    }
  } else {
    *ucs1 = 0;
  }
  return ucs1;
}

U_CAPI UChar*  U_EXPORT2
u_uastrcpy(UChar *ucs1,
          const char *s2 )
{
  UErrorCode err = U_ZERO_ERROR;
  UConverter *cnv = u_getDefaultConverter(&err);
  if(U_SUCCESS(err) && cnv != NULL) {
    ucnv_toUChars(cnv,
                    ucs1,
                    MAX_STRLEN,
                    s2,
                    uprv_strlen(s2),
                    &err);
    u_releaseDefaultConverter(cnv);
    if(U_FAILURE(err)) {
      *ucs1 = 0;
    }
  } else {
    *ucs1 = 0;
  }
  return ucs1;
}

/*
 returns the minimum of (the length of the null-terminated string) and n.
*/
static int32_t u_ustrnlen(const UChar *ucs1, int32_t n)
{
    int32_t len = 0;

    if (ucs1)
    {
        while (n-- && *(ucs1++))
        {
            len++;
        }
    }
    return len;
}

U_CAPI char*  U_EXPORT2
u_austrncpy(char *s1,
        const UChar *ucs2,
        int32_t n)
{
  char *target = s1;
  UErrorCode err = U_ZERO_ERROR;
  UConverter *cnv = u_getDefaultConverter(&err);
  if(U_SUCCESS(err) && cnv != NULL) {
    ucnv_reset(cnv);
    ucnv_fromUnicode(cnv,
                  &target,
                  s1+n,
                  &ucs2,
                  ucs2+u_ustrnlen(ucs2, n),
                  NULL,
                  TRUE,
                  &err);
    ucnv_reset(cnv); /* be good citizens */
    u_releaseDefaultConverter(cnv);
    if(U_FAILURE(err) && (err != U_BUFFER_OVERFLOW_ERROR) ) {
      *s1 = 0; /* failure */
    }
    if(target < (s1+n)) { /* U_BUFFER_OVERFLOW_ERROR isn't an err, just means no termination will happen. */
      *target = 0;  /* terminate */
    }
  } else {
    *s1 = 0;
  }
  return s1;
}

U_CAPI char*  U_EXPORT2
u_austrcpy(char *s1,
         const UChar *ucs2 )
{
  UErrorCode err = U_ZERO_ERROR;
  UConverter *cnv = u_getDefaultConverter(&err);
  if(U_SUCCESS(err) && cnv != NULL) {
    int32_t len = ucnv_fromUChars(cnv,
                  s1,
                  MAX_STRLEN,
                  ucs2,
                  -1,
                  &err);
    u_releaseDefaultConverter(cnv);
    s1[len] = 0;
  } else {
    *s1 = 0;
  }
  return s1;
}

/* mutexed access to a shared default converter ----------------------------- */

UBool ustring_cleanup(void) {
    if (gDefaultConverter) {
        ucnv_close(gDefaultConverter);
        gDefaultConverter = NULL;
    }
    
    /* it's safe to close a 0 converter  */
    return TRUE;
}

U_CAPI UConverter* U_EXPORT2
u_getDefaultConverter(UErrorCode *status)
{
    UConverter *converter = NULL;
    
    if (gDefaultConverter != NULL) {
        umtx_lock(NULL);
        
        /* need to check to make sure it wasn't taken out from under us */
        if (gDefaultConverter != NULL) {
            converter = gDefaultConverter;
            gDefaultConverter = NULL;
        }
        umtx_unlock(NULL);
    }

    /* if the cache was empty, create a converter */
    if(converter == NULL) {
        converter = ucnv_open(NULL, status);
        if(U_FAILURE(*status)) {
            return NULL;
        }
    }

    return converter;
}

U_CAPI void U_EXPORT2
u_releaseDefaultConverter(UConverter *converter)
{
  if(gDefaultConverter == NULL) {
    if (converter != NULL) {
      ucnv_reset(converter);
    }
    umtx_lock(NULL);

    if(gDefaultConverter == NULL) {
      gDefaultConverter = converter;
      converter = NULL;
    }
    umtx_unlock(NULL);
  }

  if(converter != NULL) {
    ucnv_close(converter);
  }
}

/* u_unescape & support fns ------------------------------------------------- */

/* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
static const UChar UNESCAPE_MAP[] = {
    /*"   0x22, 0x22 */
    /*'   0x27, 0x27 */
    /*?   0x3F, 0x3F */
    /*\   0x5C, 0x5C */
    /*a*/ 0x61, 0x07,
    /*b*/ 0x62, 0x08,
    /*f*/ 0x66, 0x0c,
    /*n*/ 0x6E, 0x0a,
    /*r*/ 0x72, 0x0d,
    /*t*/ 0x74, 0x09,
    /*v*/ 0x76, 0x0b
};
enum { UNESCAPE_MAP_LENGTH = sizeof(UNESCAPE_MAP) / sizeof(UNESCAPE_MAP[0]) };

/* Convert one octal digit to a numeric value 0..7, or -1 on failure */
static int8_t _digit8(UChar c) {
    if (c >= 0x0030 && c <= 0x0037) {
        return (int8_t)(c - 0x0030);
    }
    return -1;
}

/* Convert one hex digit to a numeric value 0..F, or -1 on failure */
static int8_t _digit16(UChar c) {
    if (c >= 0x0030 && c <= 0x0039) {
        return (int8_t)(c - 0x0030);
    }
    if (c >= 0x0041 && c <= 0x0046) {
        return (int8_t)(c - (0x0041 - 10));
    }
    if (c >= 0x0061 && c <= 0x0066) {
        return (int8_t)(c - (0x0061 - 10));
    }
    return -1;
}

/* Parse a single escape sequence.  Although this method deals in
 * UChars, it does not use C++ or UnicodeString.  This allows it to
 * be used from C contexts. */
U_CAPI UChar32 U_EXPORT2
u_unescapeAt(UNESCAPE_CHAR_AT charAt,
             int32_t *offset,
             int32_t length,
             void *context) {

    int32_t start = *offset;
    UChar c;
    UChar32 result = 0;
    int8_t n = 0;
    int8_t minDig = 0;
    int8_t maxDig = 0;
    int8_t bitsPerDigit = 4; 
    int8_t dig;
    int32_t i;

    /* Check that offset is in range */
    if (*offset < 0 || *offset >= length) {
        goto err;
    }

    /* Fetch first UChar after '\\' */
    c = charAt((*offset)++, context);

    /* Convert hexadecimal and octal escapes */
    switch (c) {
    case 0x0075 /*'u'*/:
        minDig = maxDig = 4;
        break;
    case 0x0055 /*'U'*/:
        minDig = maxDig = 8;
        break;
    case 0x0078 /*'x'*/:
        minDig = 1;
        maxDig = 2;
        break;
    default:
        dig = _digit8(c);
        if (dig >= 0) {
            minDig = 1;
            maxDig = 3;
            n = 1; /* Already have first octal digit */
            bitsPerDigit = 3;
            result = dig;
        }
        break;
    }
    if (minDig != 0) {
        while (*offset < length && n < maxDig) {
            c = charAt(*offset, context);
            dig = (int8_t)((bitsPerDigit == 3) ? _digit8(c) : _digit16(c));
            if (dig < 0) {
                break;
            }
            result = (result << bitsPerDigit) | dig;
            ++(*offset);
            ++n;
        }
        if (n < minDig) {
            goto err;
        }
        return result;
    }

    /* Convert C-style escapes in table */
    for (i=0; i<UNESCAPE_MAP_LENGTH; i+=2) {
        if (c == UNESCAPE_MAP[i]) {
            return UNESCAPE_MAP[i+1];
        } else if (c < UNESCAPE_MAP[i]) {
            break;
        }
    }

    /* If no special forms are recognized, then consider
     * the backslash to generically escape the next character.
     * Deal with surrogate pairs. */
    if (UTF_IS_FIRST_SURROGATE(c) && *offset < length) {
        UChar c2 = charAt(*offset, context);
        if (UTF_IS_SECOND_SURROGATE(c2)) {
            ++(*offset);
            return UTF16_GET_PAIR_VALUE(c, c2);
        }
    }
    return c;

 err:
    /* Invalid escape sequence */
    *offset = start; /* Reset to initial value */
    return (UChar32)0xFFFFFFFF;
}

/* u_unescapeAt() callback to return a UChar from a char* */
static UChar U_CALLCONV
_charPtr_charAt(int32_t offset, void *context) {
    UChar c16;
    /* It would be more efficient to access the invariant tables
     * directly but there is no API for that. */
    u_charsToUChars(((char*) context) + offset, &c16, 1);
    return c16;
}

/* Append an escape-free segment of the text; used by u_unescape() */
static void _appendUChars(UChar *dest, int32_t destCapacity,
                          const char *src, int32_t srcLen) {
    if (destCapacity < 0) {
        destCapacity = 0;
    }
    if (srcLen > destCapacity) {
        srcLen = destCapacity;
    }
    u_charsToUChars(src, dest, srcLen);
}

/* Do an invariant conversion of char* -> UChar*, with escape parsing */
U_CAPI int32_t U_EXPORT2
u_unescape(const char *src, UChar *dest, int32_t destCapacity) {
    const char *segment = src;
    int32_t i = 0;
    char c;

    while ((c=*src) != 0) {
        /* '\\' intentionally written as compiler-specific
         * character constant to correspond to compiler-specific
         * char* constants. */
        if (c == '\\') {
            int32_t lenParsed = 0;
            UChar32 c32;
            if (src != segment) {
                if (dest != NULL) {
                    _appendUChars(dest + i, destCapacity - i,
                                  segment, src - segment);
                }
                i += src - segment;
            }
            ++src; /* advance past '\\' */
            c32 = u_unescapeAt(_charPtr_charAt, &lenParsed, uprv_strlen(src), (void*)src);
            if (lenParsed == 0) {
                goto err;
            }
            src += lenParsed; /* advance past escape seq. */
            if (dest != NULL && UTF_CHAR_LENGTH(c32) <= (destCapacity - i)) {
                UTF_APPEND_CHAR_UNSAFE(dest, i, c32);
            } else {
                i += UTF_CHAR_LENGTH(c32);
            }
            segment = src;
        } else {
            ++src;
        }
    }
    if (src != segment) {
        if (dest != NULL) {
            _appendUChars(dest + i, destCapacity - i,
                          segment, src - segment);
        }
        i += src - segment;
    }
    if (dest != NULL && i < destCapacity) {
        dest[i] = 0;
    }
    return i + 1; /* add 1 for zero term */

 err:
    if (dest != NULL && destCapacity > 0) {
        *dest = 0;
    }
    return 0;
}

/* C UGrowBuffer implementation --------------------------------------------- */

U_CAPI UBool /* U_CALLCONV U_EXPORT2 */
u_growBufferFromStatic(void *context,
                       UChar **pBuffer, int32_t *pCapacity, int32_t reqCapacity,
                       int32_t length) {
    UChar *newBuffer=(UChar *)uprv_malloc(reqCapacity*U_SIZEOF_UCHAR);
    if(newBuffer!=NULL) {
        if(length>0) {
            uprv_memcpy(newBuffer, *pBuffer, length*U_SIZEOF_UCHAR);
        }
        *pCapacity=reqCapacity;
    } else {
        *pCapacity=0;
    }

    /* release the old pBuffer if it was not statically allocated */
    if(*pBuffer!=(UChar *)context) {
        uprv_free(*pBuffer);
    }

    *pBuffer=newBuffer;
    return (UBool)(newBuffer!=NULL);
}

/* NUL-termination of strings ----------------------------------------------- */

/**
 * NUL-terminate a string no matter what its type.
 * Set warning and error codes accordingly.
 */
#define __TERMINATE_STRING(dest, destCapacity, length, pErrorCode)      \
    if(pErrorCode!=NULL && U_SUCCESS(*pErrorCode)) {                    \
        /* not a public function, so no complete argument checking */   \
                                                                        \
        if(length<0) {                                                  \
            /* assume that the caller handles this */                   \
        } else if(length<destCapacity) {                                \
            /* NUL-terminate the string, the NUL fits */                \
            dest[length]=0;                                             \
            /* unset the not-terminated warning but leave all others */ \
            if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {          \
                *pErrorCode=U_ZERO_ERROR;                               \
            }                                                           \
        } else if(length==destCapacity) {                               \
            /* unable to NUL-terminate, but the string itself fit - set a warning code */ \
            *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;                \
        } else /* length>destCapacity */ {                              \
            /* even the string itself did not fit - set an error code */ \
            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;                        \
        }                                                               \
    }

U_CAPI int32_t U_EXPORT2
u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
    __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
    return length;
}

U_CAPI int32_t U_EXPORT2
u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
    __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
    return length;
}

U_CAPI int32_t U_EXPORT2
u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
    __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
    return length;
}

U_CAPI int32_t U_EXPORT2
u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
    __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
    return length;
}