scuffed-code/icu4c/source/common/compitr.cpp

/*
**********************************************************************
*   Copyright (C) 1996-1999, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*/

#include "dcmpdata.h"

#include "compitr.h"

#include "unicode/normlzr.h"

/**
 * Construct a new <tt>ComposedCharIter</tt>.  The iterator will return
 * all Unicode characters with canonical decompositions, including Korean
 * Hangul characters.
 */
ComposedCharIter::ComposedCharIter()
  : minDecomp(DecompData::MAX_COMPAT), 
    hangul(FALSE),
    curChar(0),
    nextChar(ComposedCharIter::DONE)
{
}


  /**
   * Constructs a non-default <tt>ComposedCharIter</tt> with optional behavior.
   * <p>
   * @param compat    <tt>false</tt> for canonical decompositions only;
   *                  <tt>true</tt> for both canonical and compatibility
   *                  decompositions.
   *
   * @param options   Optional decomposition features.  Currently, the only
   *                  supported option is {@link Normalizer#IGNORE_HANGUL}, which
   *                  causes this <tt>ComposedCharIter</tt> not to iterate
   *                  over the Hangul characters and their corresponding
   *                  Jamo decompositions.
   */
ComposedCharIter::ComposedCharIter(UBool compat, 
                   int32_t options)
  : minDecomp(compat ? 0 : DecompData::MAX_COMPAT),
    hangul((options & Normalizer::IGNORE_HANGUL) == 0),
    curChar(0),
    nextChar(ComposedCharIter::DONE)
{
}

/**
 * Determines whether there any precomposed Unicode characters not yet returned
 * by {@link #next}.
 */
UBool ComposedCharIter::hasNext() const {
    if (nextChar == DONE)  {
        ((ComposedCharIter*)this)->findNextChar();
    }
    return nextChar != DONE;
}

/**
 * Returns the next precomposed Unicode character.
 * Repeated calls to <tt>next</tt> return all of the precomposed characters defined
 * by Unicode, in ascending order.  After all precomposed characters have
 * been returned, {@link #hasNext} will return <tt>false</tt> and further calls
 * to <tt>next</tt> will return {@link #DONE}.
 */
UChar ComposedCharIter::next()
{
    if (nextChar == DONE)  {
        findNextChar();
    }
    curChar = nextChar;
    nextChar = DONE;
    return curChar;
}

/**
 * Returns the Unicode decomposition of the current character.
 * This method returns the decomposition of the precomposed character most
 * recently returned by {@link #next}.  The resulting decomposition is
 * affected by the settings of the
 * {@link Normalizer#COMPATIBILITY COMPATIBILITY}
 * and {@link Normalizer#NO_HANGUL NO_HANGUL} options passed to the constructor.
 */
void ComposedCharIter::getDecomposition(UnicodeString& result) const
{
    // We duplicate most of the implementation of Normalizer::decompose() here
    // for efficiency.  One thing we don't duplicate is the recursive
    // decomposition code.  If we detect a need to do recursive decomposition
    // (which happens for only 16 characters in Unicode 3.0) then we delegate to
    // Normalizer::decompose().  This gives us optimal performance without
    // having a complete copy of Normalizer::decompose() here, with its extra
    // baggage of recursion buffers, etc. - Liu

    result.truncate(0);

    uint16_t offset = ucmp16_getu(DecompData::offsets, curChar);
    uint16_t index  = (uint16_t)(offset & DecompData::DECOMP_MASK);
    if (index > minDecomp) {
        if ((offset & DecompData::DECOMP_RECURSE) != 0) {
            // Let Normalizer::decompose() handle recursive decomp
            UnicodeString temp(curChar);
            UErrorCode status = U_ZERO_ERROR;
            Normalizer::decompose(temp, minDecomp > 0,
                                  hangul ? Normalizer::IGNORE_HANGUL : 0,
                                  result, status);
        } else {
            Normalizer::doAppend((const UChar*)DecompData::contents, index, result);
        }
    } 
    else if (hangul && curChar >= Normalizer::HANGUL_BASE && curChar < Normalizer::HANGUL_LIMIT) {
        Normalizer::hangulToJamo(curChar, result, (uint16_t)minDecomp);
    } 
    else {
        result += curChar;
    }
}

void ComposedCharIter::findNextChar()
{
    if (curChar != DONE) {
        UChar ch = curChar;
        while (++ch < 0xFFFF) {
            UChar offset = ucmp16_getu(DecompData::offsets, ch);
            if (offset > minDecomp
                || (hangul && ch >= Normalizer::HANGUL_BASE && ch < Normalizer::HANGUL_LIMIT) ) {
                nextChar = ch;
                break;
            }
        }
    }
}
Initial revision X-SVN-Rev: 2 1999-08-16 21:50:52 +00:00			`/*`
ICU-161 (c)opyright notices X-SVN-Rev: 362 1999-12-09 23:27:55 +00:00			`**********************************************************************`
			`* Copyright (C) 1996-1999, International Business Machines`
			`* Corporation and others. All Rights Reserved.`
			`**********************************************************************`
Initial revision X-SVN-Rev: 2 1999-08-16 21:50:52 +00:00			`*/`

			`#include "dcmpdata.h"`

			`#include "compitr.h"`

ICU-12 all public include files are now in unicode dir, all private icu_ functions renamed to uprv_ X-SVN-Rev: 472 1999-12-28 23:39:02 +00:00			`#include "unicode/normlzr.h"`
Initial revision X-SVN-Rev: 2 1999-08-16 21:50:52 +00:00
			`/**`
			`* Construct a new <tt>ComposedCharIter</tt>. The iterator will return`
			`* all Unicode characters with canonical decompositions, including Korean`
			`* Hangul characters.`
			`*/`
			`ComposedCharIter::ComposedCharIter()`
			`: minDecomp(DecompData::MAX_COMPAT),`
			`hangul(FALSE),`
			`curChar(0),`
			`nextChar(ComposedCharIter::DONE)`
			`{`
			`}`


			`/**`
			`* Constructs a non-default <tt>ComposedCharIter</tt> with optional behavior.`
			`* <p>`
			`* @param compat <tt>false</tt> for canonical decompositions only;`
			`* <tt>true</tt> for both canonical and compatibility`
			`* decompositions.`
			`*`
			`* @param options Optional decomposition features. Currently, the only`
			`* supported option is {@link Normalizer#IGNORE_HANGUL}, which`
			`* causes this <tt>ComposedCharIter</tt> not to iterate`
			`* over the Hangul characters and their corresponding`
			`* Jamo decompositions.`
			`*/`
ICU-351 Define UBool to be used in the APIs. X-SVN-Rev: 1410 2000-05-18 22:08:39 +00:00			`ComposedCharIter::ComposedCharIter(UBool compat,`
Initial revision X-SVN-Rev: 2 1999-08-16 21:50:52 +00:00			`int32_t options)`
			`: minDecomp(compat ? 0 : DecompData::MAX_COMPAT),`
			`hangul((options & Normalizer::IGNORE_HANGUL) == 0),`
			`curChar(0),`
			`nextChar(ComposedCharIter::DONE)`
			`{`
			`}`

			`/**`
			`* Determines whether there any precomposed Unicode characters not yet returned`
			`* by {@link #next}.`
			`*/`
ICU-351 Define UBool to be used in the APIs. X-SVN-Rev: 1410 2000-05-18 22:08:39 +00:00			`UBool ComposedCharIter::hasNext() const {`
Initial revision X-SVN-Rev: 2 1999-08-16 21:50:52 +00:00			`if (nextChar == DONE) {`
			`((ComposedCharIter*)this)->findNextChar();`
			`}`
			`return nextChar != DONE;`
			`}`

			`/**`
			`* Returns the next precomposed Unicode character.`
			`* Repeated calls to <tt>next</tt> return all of the precomposed characters defined`
			`* by Unicode, in ascending order. After all precomposed characters have`
			`* been returned, {@link #hasNext} will return <tt>false</tt> and further calls`
			`* to <tt>next</tt> will return {@link #DONE}.`
			`*/`
			`UChar ComposedCharIter::next()`
			`{`
			`if (nextChar == DONE) {`
			`findNextChar();`
			`}`
			`curChar = nextChar;`
			`nextChar = DONE;`
			`return curChar;`
			`}`

			`/**`
			`* Returns the Unicode decomposition of the current character.`
			`* This method returns the decomposition of the precomposed character most`
			`* recently returned by {@link #next}. The resulting decomposition is`
			`* affected by the settings of the`
			`* {@link Normalizer#COMPATIBILITY COMPATIBILITY}`
			`* and {@link Normalizer#NO_HANGUL NO_HANGUL} options passed to the constructor.`
			`*/`
			`void ComposedCharIter::getDecomposition(UnicodeString& result) const`
			`{`
ICU-89 fix handling of recursively decomposing chars X-SVN-Rev: 2089 2000-08-01 20:29:15 +00:00			`// We duplicate most of the implementation of Normalizer::decompose() here`
			`// for efficiency. One thing we don't duplicate is the recursive`
			`// decomposition code. If we detect a need to do recursive decomposition`
			`// (which happens for only 16 characters in Unicode 3.0) then we delegate to`
			`// Normalizer::decompose(). This gives us optimal performance without`
			`// having a complete copy of Normalizer::decompose() here, with its extra`
			`// baggage of recursion buffers, etc. - Liu`

Initial revision X-SVN-Rev: 2 1999-08-16 21:50:52 +00:00			`result.truncate(0);`

ICU-89 fix handling of recursively decomposing chars X-SVN-Rev: 2089 2000-08-01 20:29:15 +00:00			`uint16_t offset = ucmp16_getu(DecompData::offsets, curChar);`
ICU-535 fixed some compiler warnings X-SVN-Rev: 2182 2000-08-11 02:08:59 +00:00			`uint16_t index = (uint16_t)(offset & DecompData::DECOMP_MASK);`
ICU-89 fix handling of recursively decomposing chars X-SVN-Rev: 2089 2000-08-01 20:29:15 +00:00			`if (index > minDecomp) {`
			`if ((offset & DecompData::DECOMP_RECURSE) != 0) {`
			`// Let Normalizer::decompose() handle recursive decomp`
			`UnicodeString temp(curChar);`
			`UErrorCode status = U_ZERO_ERROR;`
			`Normalizer::decompose(temp, minDecomp > 0,`
			`hangul ? Normalizer::IGNORE_HANGUL : 0,`
			`result, status);`
			`} else {`
			`Normalizer::doAppend((const UChar*)DecompData::contents, index, result);`
			`}`
Initial revision X-SVN-Rev: 2 1999-08-16 21:50:52 +00:00			`}`
			`else if (hangul && curChar >= Normalizer::HANGUL_BASE && curChar < Normalizer::HANGUL_LIMIT) {`
ICU-163 Fixed warnings on MSVC 6.0. X-SVN-Rev: 476 1999-12-29 22:33:47 +00:00			`Normalizer::hangulToJamo(curChar, result, (uint16_t)minDecomp);`
Initial revision X-SVN-Rev: 2 1999-08-16 21:50:52 +00:00			`}`
			`else {`
			`result += curChar;`
			`}`
			`}`

			`void ComposedCharIter::findNextChar()`
			`{`
			`if (curChar != DONE) {`
			`UChar ch = curChar;`
			`while (++ch < 0xFFFF) {`
			`UChar offset = ucmp16_getu(DecompData::offsets, ch);`
			`if (offset > minDecomp`
			`\|\| (hangul && ch >= Normalizer::HANGUL_BASE && ch < Normalizer::HANGUL_LIMIT) ) {`
			`nextChar = ch;`
			`break;`
			`}`
			`}`
			`}`
			`}`